From 70363836d5a462851d8d2196eb288d7bdb1ad01d Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 28 Aug 2024 13:39:06 +0200 Subject: [PATCH 01/15] Correctly mark PyPI package name specs with multiple specifiers as remote libraries (#1725) Correctly mark pypi package name specs with multiple specifiers as remote libraries Fixes this https://github.com/databricks/cli/issues/1728 --- bundle/libraries/local_path.go | 6 ++++-- bundle/libraries/local_path_test.go | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index e49562405..6d60d56bc 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -72,9 +72,11 @@ func IsLibraryLocal(dep string) bool { // ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_). // \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. -// ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?)?: Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). +// ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?): Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). +// ,?: Optionally matches a comma (,) at the end of the specifier which is used to separate multiple specifiers. +// There can be multiple version specifiers separated by commas or no specifiers. // Spec for package name and version specifier: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ -var packageRegex = regexp.MustCompile(`^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$`) +var packageRegex = regexp.MustCompile(`^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?,?)*$`) func isPackage(name string) bool { if packageRegex.MatchString(name) { diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index 667d64ec8..dc157cad8 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -62,6 +62,8 @@ func TestIsLibraryLocal(t *testing.T) { {path: "beautifulsoup4 ~= 4.12.3", expected: false}, {path: "beautifulsoup4[security, tests]", expected: false}, {path: "beautifulsoup4[security, tests] ~= 4.12.3", expected: false}, + {path: "beautifulsoup4>=1.0.0,<2.0.0", expected: false}, + {path: "beautifulsoup4>=1.0.0,~=1.2.0,<2.0.0", expected: false}, {path: "https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, {path: "pip @ https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, {path: "requests [security] @ https://github.com/psf/requests/archive/refs/heads/main.zip", expected: false}, From 85459c1963e6e86f3ece1029caae51156f6b87f6 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Wed, 28 Aug 2024 14:14:19 +0200 Subject: [PATCH 02/15] Improve error handling for /Volumes paths in mode: development (#1716) ## Changes * Provide a more helpful error when using an artifact_path based on /Volumes * Allow the use of short_names in /Volumes paths ## Example cases Example of a valid /Volumes artifact_path: * `artifact_path: /Volumes/catalog/schema/${workspace.current_user.short_name}/libs` Example of an invalid /Volumes path (when using `mode: development`): * `artifact_path: /Volumes/catalog/schema/libs` * Resulting error: `artifact_path should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'` --- bundle/config/mutator/process_target_mode.go | 35 +++++++++++++------ .../mutator/process_target_mode_test.go | 12 ++++++- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/bundle/config/mutator/process_target_mode.go b/bundle/config/mutator/process_target_mode.go index 92ed28689..70382f054 100644 --- a/bundle/config/mutator/process_target_mode.go +++ b/bundle/config/mutator/process_target_mode.go @@ -64,6 +64,7 @@ func transformDevelopmentMode(ctx context.Context, b *bundle.Bundle) { } func validateDevelopmentMode(b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics p := b.Config.Presets u := b.Config.Workspace.CurrentUser @@ -74,44 +75,56 @@ func validateDevelopmentMode(b *bundle.Bundle) diag.Diagnostics { // status to UNPAUSED at the level of an individual object, whic hwas // historically allowed.) if p.TriggerPauseStatus == config.Unpaused { - return diag.Diagnostics{{ + diags = diags.Append(diag.Diagnostic{ Severity: diag.Error, Summary: "target with 'mode: development' cannot set trigger pause status to UNPAUSED by default", Locations: []dyn.Location{b.Config.GetLocation("presets.trigger_pause_status")}, - }} + }) } // Make sure this development copy has unique names and paths to avoid conflicts if path := findNonUserPath(b); path != "" { - return diag.Errorf("%s must start with '~/' or contain the current username when using 'mode: development'", path) + if path == "artifact_path" && strings.HasPrefix(b.Config.Workspace.ArtifactPath, "/Volumes") { + // For Volumes paths we recommend including the current username as a substring + diags = diags.Extend(diag.Errorf("%s should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'", path)) + } else { + // For non-Volumes paths recommend simply putting things in the home folder + diags = diags.Extend(diag.Errorf("%s must start with '~/' or contain the current username to ensure uniqueness when using 'mode: development'", path)) + } } if p.NamePrefix != "" && !strings.Contains(p.NamePrefix, u.ShortName) && !strings.Contains(p.NamePrefix, u.UserName) { // Resources such as pipelines require a unique name, e.g. '[dev steve] my_pipeline'. // For this reason we require the name prefix to contain the current username; // it's a pitfall for users if they don't include it and later find out that // only a single user can do development deployments. - return diag.Diagnostics{{ + diags = diags.Append(diag.Diagnostic{ Severity: diag.Error, Summary: "prefix should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'", Locations: []dyn.Location{b.Config.GetLocation("presets.name_prefix")}, - }} + }) } - return nil + return diags } +// findNonUserPath finds the first workspace path such as root_path that doesn't +// contain the current username or current user's shortname. func findNonUserPath(b *bundle.Bundle) string { - username := b.Config.Workspace.CurrentUser.UserName + containsName := func(path string) bool { + username := b.Config.Workspace.CurrentUser.UserName + shortname := b.Config.Workspace.CurrentUser.ShortName + return strings.Contains(path, username) || strings.Contains(path, shortname) + } - if b.Config.Workspace.RootPath != "" && !strings.Contains(b.Config.Workspace.RootPath, username) { + if b.Config.Workspace.RootPath != "" && !containsName(b.Config.Workspace.RootPath) { return "root_path" } - if b.Config.Workspace.StatePath != "" && !strings.Contains(b.Config.Workspace.StatePath, username) { + if b.Config.Workspace.StatePath != "" && !containsName(b.Config.Workspace.StatePath) { return "state_path" } - if b.Config.Workspace.FilePath != "" && !strings.Contains(b.Config.Workspace.FilePath, username) { + if b.Config.Workspace.FilePath != "" && !containsName(b.Config.Workspace.FilePath) { return "file_path" } - if b.Config.Workspace.ArtifactPath != "" && !strings.Contains(b.Config.Workspace.ArtifactPath, username) { + if b.Config.Workspace.ArtifactPath != "" && !containsName(b.Config.Workspace.ArtifactPath) { return "artifact_path" } return "" diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index 1c8671b4c..42f1929c8 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -230,10 +230,20 @@ func TestValidateDevelopmentMode(t *testing.T) { diags := validateDevelopmentMode(b) require.NoError(t, diags.Error()) + // Test with /Volumes path + b = mockBundle(config.Development) + b.Config.Workspace.ArtifactPath = "/Volumes/catalog/schema/lennart/libs" + diags = validateDevelopmentMode(b) + require.NoError(t, diags.Error()) + b.Config.Workspace.ArtifactPath = "/Volumes/catalog/schema/libs" + diags = validateDevelopmentMode(b) + require.ErrorContains(t, diags.Error(), "artifact_path should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'") + // Test with a bundle that has a non-user path + b = mockBundle(config.Development) b.Config.Workspace.RootPath = "/Shared/.bundle/x/y/state" diags = validateDevelopmentMode(b) - require.ErrorContains(t, diags.Error(), "root_path") + require.ErrorContains(t, diags.Error(), "root_path must start with '~/' or contain the current username to ensure uniqueness when using 'mode: development'") // Test with a bundle that has an unpaused trigger pause status b = mockBundle(config.Development) From 7dcc791b05905a0ac78345651320969212f0123f Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 28 Aug 2024 15:09:07 +0200 Subject: [PATCH 03/15] [Release] Release v0.227.1 (#1729) CLI: * Disable prompt for storage-credentials get command ([#1723](https://github.com/databricks/cli/pull/1723)). Bundles: * Do not treat empty path as a local path ([#1717](https://github.com/databricks/cli/pull/1717)). * Correctly mark PyPI package name specs with multiple specifiers as remote libraries ([#1725](https://github.com/databricks/cli/pull/1725)). * Improve error handling for /Volumes paths in mode: development ([#1716](https://github.com/databricks/cli/pull/1716)). Internal: * Ignore CLI version check on development builds of the CLI ([#1714](https://github.com/databricks/cli/pull/1714)). API Changes: * Added `databricks resource-quotas` command group. * Added `databricks policy-compliance-for-clusters` command group. * Added `databricks policy-compliance-for-jobs` command group. OpenAPI commit 3eae49b444cac5a0118a3503e5b7ecef7f96527a (2024-08-21) Dependency updates: * Bump github.com/databricks/databricks-sdk-go from 0.44.0 to 0.45.0 ([#1719](https://github.com/databricks/cli/pull/1719)). * Revert hc-install version to 0.7.0 ([#1711](https://github.com/databricks/cli/pull/1711)). --- CHANGELOG.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88a62d098..fac7d597d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Version changelog +## [Release] Release v0.227.1 + +CLI: + * Disable prompt for storage-credentials get command ([#1723](https://github.com/databricks/cli/pull/1723)). + +Bundles: + * Do not treat empty path as a local path ([#1717](https://github.com/databricks/cli/pull/1717)). + * Correctly mark PyPI package name specs with multiple specifiers as remote libraries ([#1725](https://github.com/databricks/cli/pull/1725)). + * Improve error handling for /Volumes paths in mode: development ([#1716](https://github.com/databricks/cli/pull/1716)). + +Internal: + * Ignore CLI version check on development builds of the CLI ([#1714](https://github.com/databricks/cli/pull/1714)). + +API Changes: + * Added `databricks resource-quotas` command group. + * Added `databricks policy-compliance-for-clusters` command group. + * Added `databricks policy-compliance-for-jobs` command group. + +OpenAPI commit 3eae49b444cac5a0118a3503e5b7ecef7f96527a (2024-08-21) +Dependency updates: + * Bump github.com/databricks/databricks-sdk-go from 0.44.0 to 0.45.0 ([#1719](https://github.com/databricks/cli/pull/1719)). + * Revert hc-install version to 0.7.0 ([#1711](https://github.com/databricks/cli/pull/1711)). + ## [Release] Release v0.227.0 CLI: From 43ace69bb955a445885d4477a55de88c237a05ba Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 29 Aug 2024 14:47:44 +0200 Subject: [PATCH 04/15] Consider serverless clusters as compatible for Python wheel tasks (#1733) ## Changes Consider serverless clusters as compatible for Python wheel tasks. Fixes a `Python wheel tasks require compute with DBR 13.3+ to include local libraries` warning shown for serverless clusters --- bundle/python/warning.go | 20 ++++++++++++++++---- bundle/python/warning_test.go | 4 +++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/bundle/python/warning.go b/bundle/python/warning.go index d53796d73..0e9d8bef0 100644 --- a/bundle/python/warning.go +++ b/bundle/python/warning.go @@ -2,6 +2,7 @@ package python import ( "context" + "strconv" "strings" "github.com/databricks/cli/bundle" @@ -38,7 +39,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { tasks := libraries.FindTasksWithLocalLibraries(b) for _, task := range tasks { if task.NewCluster != nil { - if lowerThanExpectedVersion(ctx, task.NewCluster.SparkVersion) { + if lowerThanExpectedVersion(task.NewCluster.SparkVersion) { return true } } @@ -47,7 +48,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { for _, job := range b.Config.Resources.Jobs { for _, cluster := range job.JobClusters { if task.JobClusterKey == cluster.JobClusterKey && cluster.NewCluster.SparkVersion != "" { - if lowerThanExpectedVersion(ctx, cluster.NewCluster.SparkVersion) { + if lowerThanExpectedVersion(cluster.NewCluster.SparkVersion) { return true } } @@ -64,7 +65,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { return false } - if lowerThanExpectedVersion(ctx, version) { + if lowerThanExpectedVersion(version) { return true } } @@ -73,7 +74,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { return false } -func lowerThanExpectedVersion(ctx context.Context, sparkVersion string) bool { +func lowerThanExpectedVersion(sparkVersion string) bool { parts := strings.Split(sparkVersion, ".") if len(parts) < 2 { return false @@ -82,6 +83,17 @@ func lowerThanExpectedVersion(ctx context.Context, sparkVersion string) bool { if parts[1][0] == 'x' { // treat versions like 13.x as the very latest minor (13.99) parts[1] = "99" } + + // if any of the version parts are not numbers, we can't compare + // so consider it as compatible version + if _, err := strconv.Atoi(parts[0]); err != nil { + return false + } + + if _, err := strconv.Atoi(parts[1]); err != nil { + return false + } + v := "v" + parts[0] + "." + parts[1] return semver.Compare(v, "v13.1") < 0 } diff --git a/bundle/python/warning_test.go b/bundle/python/warning_test.go index b2296392b..a5ab75632 100644 --- a/bundle/python/warning_test.go +++ b/bundle/python/warning_test.go @@ -344,6 +344,8 @@ func TestSparkVersionLowerThanExpected(t *testing.T) { "14.1.x-scala2.12": false, "13.x-snapshot-scala-2.12": false, "13.x-rc-scala-2.12": false, + "client.1.10-scala2.12": false, + "latest-stable-gpu-scala2.11": false, "10.4.x-aarch64-photon-scala2.12": true, "10.4.x-scala2.12": true, "13.0.x-scala2.12": true, @@ -351,7 +353,7 @@ func TestSparkVersionLowerThanExpected(t *testing.T) { } for k, v := range testCases { - result := lowerThanExpectedVersion(context.Background(), k) + result := lowerThanExpectedVersion(k) require.Equal(t, v, result, k) } } From 0f4891f0fef96fab6bffce7572caa2b97615911a Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 29 Aug 2024 15:02:34 +0200 Subject: [PATCH 05/15] Add `dyn.Time` to box a timestamp with its original string value (#1732) ## Changes If not explicitly quoted, the YAML loader interprets a value like `2024-08-29` as a timestamp. Such a value is usually intended to be a string instead. Our normalization logic was not able to turn a time value back into the original string. This change boxes the time value to include its original string representation. Normalization of one of these values into a string can now use the original input value. ## Tests Unit tests in `libs/dyn/convert`. --- libs/dyn/convert/normalize.go | 2 + libs/dyn/convert/normalize_test.go | 8 ++++ libs/dyn/kind.go | 3 +- libs/dyn/merge/override_test.go | 12 +++--- libs/dyn/time.go | 62 ++++++++++++++++++++++++++++++ libs/dyn/time_test.go | 41 ++++++++++++++++++++ libs/dyn/value.go | 3 +- libs/dyn/value_underlying.go | 7 ++-- libs/dyn/value_underlying_test.go | 2 +- libs/dyn/yamlloader/loader.go | 15 ++------ libs/dyn/yamlsaver/saver.go | 2 +- libs/dyn/yamlsaver/saver_test.go | 9 +++-- 12 files changed, 136 insertions(+), 30 deletions(-) create mode 100644 libs/dyn/time.go create mode 100644 libs/dyn/time_test.go diff --git a/libs/dyn/convert/normalize.go b/libs/dyn/convert/normalize.go index c80a914f1..bc80a150a 100644 --- a/libs/dyn/convert/normalize.go +++ b/libs/dyn/convert/normalize.go @@ -267,6 +267,8 @@ func (n normalizeOptions) normalizeString(typ reflect.Type, src dyn.Value, path out = strconv.FormatInt(src.MustInt(), 10) case dyn.KindFloat: out = strconv.FormatFloat(src.MustFloat(), 'f', -1, 64) + case dyn.KindTime: + out = src.MustTime().String() case dyn.KindNil: // Return a warning if the field is present but has a null value. return dyn.InvalidValue, diags.Append(nullWarning(dyn.KindString, src, path)) diff --git a/libs/dyn/convert/normalize_test.go b/libs/dyn/convert/normalize_test.go index c2256615e..4b2a3c189 100644 --- a/libs/dyn/convert/normalize_test.go +++ b/libs/dyn/convert/normalize_test.go @@ -569,6 +569,14 @@ func TestNormalizeStringFromFloat(t *testing.T) { assert.Equal(t, dyn.NewValue("1.2", vin.Locations()), vout) } +func TestNormalizeStringFromTime(t *testing.T) { + var typ string + vin := dyn.NewValue(dyn.MustTime("2024-08-29"), []dyn.Location{{File: "file", Line: 1, Column: 1}}) + vout, err := Normalize(&typ, vin) + assert.Empty(t, err) + assert.Equal(t, dyn.NewValue("2024-08-29", vin.Locations()), vout) +} + func TestNormalizeStringError(t *testing.T) { var typ string vin := dyn.V(map[string]dyn.Value{"an": dyn.V("error")}) diff --git a/libs/dyn/kind.go b/libs/dyn/kind.go index 9d507fbc5..9c0c14423 100644 --- a/libs/dyn/kind.go +++ b/libs/dyn/kind.go @@ -2,7 +2,6 @@ package dyn import ( "fmt" - "time" ) type Kind int @@ -34,7 +33,7 @@ func kindOf(v any) Kind { return KindInt case float32, float64: return KindFloat - case time.Time: + case Time: return KindTime case nil: return KindNil diff --git a/libs/dyn/merge/override_test.go b/libs/dyn/merge/override_test.go index 9d41a526e..264c32e5e 100644 --- a/libs/dyn/merge/override_test.go +++ b/libs/dyn/merge/override_test.go @@ -83,16 +83,16 @@ func TestOverride_Primitive(t *testing.T) { { name: "time (updated)", state: visitorState{updated: []string{"root"}}, - left: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{leftLocation}), - right: dyn.NewValue(time.UnixMilli(10001), []dyn.Location{rightLocation}), - expected: dyn.NewValue(time.UnixMilli(10001), []dyn.Location{rightLocation}), + left: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{leftLocation}), + right: dyn.NewValue(dyn.FromTime(time.UnixMilli(10001)), []dyn.Location{rightLocation}), + expected: dyn.NewValue(dyn.FromTime(time.UnixMilli(10001)), []dyn.Location{rightLocation}), }, { name: "time (not updated)", state: visitorState{}, - left: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{leftLocation}), - right: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{rightLocation}), - expected: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{leftLocation}), + left: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{leftLocation}), + right: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{rightLocation}), + expected: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{leftLocation}), }, { name: "different types (updated)", diff --git a/libs/dyn/time.go b/libs/dyn/time.go new file mode 100644 index 000000000..b3b3de12c --- /dev/null +++ b/libs/dyn/time.go @@ -0,0 +1,62 @@ +package dyn + +import ( + "fmt" + "time" +) + +// Time represents a time-like primitive value. +// +// It represents a timestamp and includes the original string value +// that was parsed to create the timestamp. This makes it possible +// to coalesce a value that YAML interprets as a timestamp back into +// a string without losing information. +type Time struct { + t time.Time + s string +} + +// NewTime creates a new Time from the given string. +func NewTime(str string) (Time, error) { + // Try a couple of layouts + for _, layout := range []string{ + "2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields. + "2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t". + "2006-1-2 15:4:5.999999999", // space separated with no time zone + "2006-1-2", // date only + } { + t, terr := time.Parse(layout, str) + if terr == nil { + return Time{t: t, s: str}, nil + } + } + + return Time{}, fmt.Errorf("invalid time value: %q", str) +} + +// MustTime creates a new Time from the given string. +// It panics if the string cannot be parsed. +func MustTime(str string) Time { + t, err := NewTime(str) + if err != nil { + panic(err) + } + return t +} + +// FromTime creates a new Time from the given time.Time. +// It uses the RFC3339Nano format for its string representation. +// This guarantees that it can roundtrip into a string without losing information. +func FromTime(t time.Time) Time { + return Time{t: t, s: t.Format(time.RFC3339Nano)} +} + +// Time returns the time.Time value. +func (t Time) Time() time.Time { + return t.t +} + +// String returns the original string value that was parsed to create the timestamp. +func (t Time) String() string { + return t.s +} diff --git a/libs/dyn/time_test.go b/libs/dyn/time_test.go new file mode 100644 index 000000000..653d6855f --- /dev/null +++ b/libs/dyn/time_test.go @@ -0,0 +1,41 @@ +package dyn_test + +import ( + "testing" + "time" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestTimeValid(t *testing.T) { + for _, tc := range []string{ + "2024-08-29", + "2024-01-15T12:34:56.789012345Z", + } { + tm, err := dyn.NewTime(tc) + if assert.NoError(t, err) { + assert.NotEqual(t, time.Time{}, tm.Time()) + assert.Equal(t, tc, tm.String()) + } + } +} + +func TestTimeInvalid(t *testing.T) { + tm, err := dyn.NewTime("invalid") + assert.Error(t, err) + assert.Equal(t, dyn.Time{}, tm) +} + +func TestTimeFromTime(t *testing.T) { + tref := time.Now() + t1 := dyn.FromTime(tref) + + // Verify that the underlying value is the same. + assert.Equal(t, tref, t1.Time()) + + // Verify that the string representation can be used to construct the same. + t2, err := dyn.NewTime(t1.String()) + assert.NoError(t, err) + assert.True(t, t1.Time().Equal(t2.Time())) +} diff --git a/libs/dyn/value.go b/libs/dyn/value.go index 2aed2f6cd..81524db7e 100644 --- a/libs/dyn/value.go +++ b/libs/dyn/value.go @@ -127,7 +127,8 @@ func (v Value) AsAny() any { case KindFloat: return v.v case KindTime: - return v.v + t := v.v.(Time) + return t.Time() default: // Panic because we only want to deal with known types. panic(fmt.Sprintf("invalid kind: %d", v.k)) diff --git a/libs/dyn/value_underlying.go b/libs/dyn/value_underlying.go index 2f0f26a1f..0a867375d 100644 --- a/libs/dyn/value_underlying.go +++ b/libs/dyn/value_underlying.go @@ -2,7 +2,6 @@ package dyn import ( "fmt" - "time" ) // AsMap returns the underlying mapping if this value is a map, @@ -123,14 +122,14 @@ func (v Value) MustFloat() float64 { // AsTime returns the underlying time if this value is a time, // the zero value and false otherwise. -func (v Value) AsTime() (time.Time, bool) { - vv, ok := v.v.(time.Time) +func (v Value) AsTime() (Time, bool) { + vv, ok := v.v.(Time) return vv, ok } // MustTime returns the underlying time if this value is a time, // panics otherwise. -func (v Value) MustTime() time.Time { +func (v Value) MustTime() Time { vv, ok := v.AsTime() if !ok || v.k != KindTime { panic(fmt.Sprintf("expected kind %s, got %s", KindTime, v.k)) diff --git a/libs/dyn/value_underlying_test.go b/libs/dyn/value_underlying_test.go index e35cde582..73baeeeb2 100644 --- a/libs/dyn/value_underlying_test.go +++ b/libs/dyn/value_underlying_test.go @@ -143,7 +143,7 @@ func TestValueUnderlyingFloat(t *testing.T) { } func TestValueUnderlyingTime(t *testing.T) { - v := dyn.V(time.Now()) + v := dyn.V(dyn.FromTime(time.Now())) vv1, ok := v.AsTime() assert.True(t, ok) diff --git a/libs/dyn/yamlloader/loader.go b/libs/dyn/yamlloader/loader.go index fbb52b504..c3e8d0810 100644 --- a/libs/dyn/yamlloader/loader.go +++ b/libs/dyn/yamlloader/loader.go @@ -5,7 +5,6 @@ import ( "math" "strconv" "strings" - "time" "github.com/databricks/cli/libs/dyn" "gopkg.in/yaml.v3" @@ -207,17 +206,9 @@ func (d *loader) loadScalar(node *yaml.Node, loc dyn.Location) (dyn.Value, error case "!!null": return dyn.NewValue(nil, []dyn.Location{loc}), nil case "!!timestamp": - // Try a couple of layouts - for _, layout := range []string{ - "2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields. - "2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t". - "2006-1-2 15:4:5.999999999", // space separated with no time zone - "2006-1-2", // date only - } { - t, terr := time.Parse(layout, node.Value) - if terr == nil { - return dyn.NewValue(t, []dyn.Location{loc}), nil - } + t, err := dyn.NewTime(node.Value) + if err == nil { + return dyn.NewValue(t, []dyn.Location{loc}), nil } return dyn.InvalidValue, errorf(loc, "invalid timestamp value: %v", node.Value) default: diff --git a/libs/dyn/yamlsaver/saver.go b/libs/dyn/yamlsaver/saver.go index fe4cfb854..f4c7157f2 100644 --- a/libs/dyn/yamlsaver/saver.go +++ b/libs/dyn/yamlsaver/saver.go @@ -129,7 +129,7 @@ func (s *saver) toYamlNodeWithStyle(v dyn.Value, style yaml.Style) (*yaml.Node, case dyn.KindFloat: return &yaml.Node{Kind: yaml.ScalarNode, Value: fmt.Sprint(v.MustFloat()), Style: style}, nil case dyn.KindTime: - return &yaml.Node{Kind: yaml.ScalarNode, Value: v.MustTime().UTC().String(), Style: style}, nil + return &yaml.Node{Kind: yaml.ScalarNode, Value: v.MustTime().String(), Style: style}, nil default: // Panic because we only want to deal with known types. panic(fmt.Sprintf("invalid kind: %d", v.Kind())) diff --git a/libs/dyn/yamlsaver/saver_test.go b/libs/dyn/yamlsaver/saver_test.go index 387090104..aa481c20b 100644 --- a/libs/dyn/yamlsaver/saver_test.go +++ b/libs/dyn/yamlsaver/saver_test.go @@ -2,10 +2,10 @@ package yamlsaver import ( "testing" - "time" "github.com/databricks/cli/libs/dyn" assert "github.com/databricks/cli/libs/dyn/dynassert" + "github.com/stretchr/testify/require" "gopkg.in/yaml.v3" ) @@ -45,11 +45,14 @@ func TestMarshalBoolValue(t *testing.T) { } func TestMarshalTimeValue(t *testing.T) { + tm, err := dyn.NewTime("1970-01-01") + require.NoError(t, err) + s := NewSaver() - var timeValue = dyn.V(time.Unix(0, 0)) + var timeValue = dyn.V(tm) v, err := s.toYamlNode(timeValue) assert.NoError(t, err) - assert.Equal(t, "1970-01-01 00:00:00 +0000 UTC", v.Value) + assert.Equal(t, "1970-01-01", v.Value) assert.Equal(t, yaml.ScalarNode, v.Kind) } From 5fac7edcdf5f3e371920c2907a311cd1564bebf8 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 29 Aug 2024 16:41:12 +0200 Subject: [PATCH 06/15] Pass along $AZURE_CONFIG_FILE to Terraform process (#1734) ## Changes This ensures that the CLI and Terraform can both use an Azure CLI session configured under a non-standard path. This is the default behavior on Azure DevOps when using the AzureCLI@2 task. Fixes #1722. ## Tests Unit test. --- bundle/deploy/terraform/init.go | 7 +++++++ bundle/deploy/terraform/init_test.go | 19 ++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/bundle/deploy/terraform/init.go b/bundle/deploy/terraform/init.go index e7f720d08..7d75ee8a8 100644 --- a/bundle/deploy/terraform/init.go +++ b/bundle/deploy/terraform/init.go @@ -111,6 +111,13 @@ func inheritEnvVars(ctx context.Context, environ map[string]string) error { environ["PATH"] = path } + // Include $AZURE_CONFIG_FILE in set of environment variables to pass along. + // This is set in Azure DevOps by the AzureCLI@2 task. + azureConfigFile, ok := env.Lookup(ctx, "AZURE_CONFIG_FILE") + if ok { + environ["AZURE_CONFIG_FILE"] = azureConfigFile + } + // Include $TF_CLI_CONFIG_FILE to override terraform provider in development. // See: https://developer.hashicorp.com/terraform/cli/config/config-file#explicit-installation-method-configuration devConfigFile, ok := env.Lookup(ctx, "TF_CLI_CONFIG_FILE") diff --git a/bundle/deploy/terraform/init_test.go b/bundle/deploy/terraform/init_test.go index 94e47dbc1..450e7eb6a 100644 --- a/bundle/deploy/terraform/init_test.go +++ b/bundle/deploy/terraform/init_test.go @@ -269,19 +269,20 @@ func TestSetUserAgentExtraEnvVar(t *testing.T) { } func TestInheritEnvVars(t *testing.T) { - env := map[string]string{} - t.Setenv("HOME", "/home/testuser") t.Setenv("PATH", "/foo:/bar") t.Setenv("TF_CLI_CONFIG_FILE", "/tmp/config.tfrc") + t.Setenv("AZURE_CONFIG_FILE", "/tmp/foo/bar") - err := inheritEnvVars(context.Background(), env) - - require.NoError(t, err) - - require.Equal(t, env["HOME"], "/home/testuser") - require.Equal(t, env["PATH"], "/foo:/bar") - require.Equal(t, env["TF_CLI_CONFIG_FILE"], "/tmp/config.tfrc") + ctx := context.Background() + env := map[string]string{} + err := inheritEnvVars(ctx, env) + if assert.NoError(t, err) { + assert.Equal(t, "/home/testuser", env["HOME"]) + assert.Equal(t, "/foo:/bar", env["PATH"]) + assert.Equal(t, "/tmp/config.tfrc", env["TF_CLI_CONFIG_FILE"]) + assert.Equal(t, "/tmp/foo/bar", env["AZURE_CONFIG_FILE"]) + } } func TestSetUserProfileFromInheritEnvVars(t *testing.T) { From 2e000f1ebd0f22933bb2fd64ee7a9f44cb53b5d8 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Thu, 29 Aug 2024 21:07:21 +0200 Subject: [PATCH 07/15] Use materialized views in the default-sql template (#1709) ## Changes Materialized views now support `CREATE OR REPLACE` ([docs](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-materialized-view.html))! This makes it possible to use them with Workflows in DABs.This PR updates the template to use a materialized view rather than a regular view. ## Tests Manually validated in production. --- .../template/{{.project_name}}/src/orders_daily.sql.tmpl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl index 8a9d12ea8..e5ceb77a9 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl @@ -1,10 +1,9 @@ -- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml) -{{- /* We can't use a materialized view here since they don't support 'create or refresh' yet.*/}} USE CATALOG {{"{{"}}catalog{{"}}"}}; USE IDENTIFIER({{"{{"}}schema{{"}}"}}); -CREATE OR REPLACE VIEW +CREATE OR REPLACE MATERIALIZED VIEW orders_daily AS SELECT order_date, count(*) AS number_of_orders From ed4a4585c07602e84a8ce97336688463d9833527 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Fri, 30 Aug 2024 09:32:10 +0200 Subject: [PATCH 08/15] Update templates to latest LTS DBR (#1715) ## Changes This updates the templates to use the latest DBR 15 LTS version. - [x] DB Connect 15.4 must be released + validated before this can go out --- libs/template/templates/dbt-sql/library/versions.tmpl | 4 ++-- libs/template/templates/default-python/library/versions.tmpl | 4 ++-- libs/template/templates/default-sql/library/versions.tmpl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/template/templates/dbt-sql/library/versions.tmpl b/libs/template/templates/dbt-sql/library/versions.tmpl index f9a879d25..7d0c88e7d 100644 --- a/libs/template/templates/dbt-sql/library/versions.tmpl +++ b/libs/template/templates/dbt-sql/library/versions.tmpl @@ -1,7 +1,7 @@ {{define "latest_lts_dbr_version" -}} - 13.3.x-scala2.12 + 15.4.x-scala2.12 {{- end}} {{define "latest_lts_db_connect_version_spec" -}} - >=13.3,<13.4 + >=15.4,<15.5 {{- end}} diff --git a/libs/template/templates/default-python/library/versions.tmpl b/libs/template/templates/default-python/library/versions.tmpl index f9a879d25..7d0c88e7d 100644 --- a/libs/template/templates/default-python/library/versions.tmpl +++ b/libs/template/templates/default-python/library/versions.tmpl @@ -1,7 +1,7 @@ {{define "latest_lts_dbr_version" -}} - 13.3.x-scala2.12 + 15.4.x-scala2.12 {{- end}} {{define "latest_lts_db_connect_version_spec" -}} - >=13.3,<13.4 + >=15.4,<15.5 {{- end}} diff --git a/libs/template/templates/default-sql/library/versions.tmpl b/libs/template/templates/default-sql/library/versions.tmpl index f9a879d25..7d0c88e7d 100644 --- a/libs/template/templates/default-sql/library/versions.tmpl +++ b/libs/template/templates/default-sql/library/versions.tmpl @@ -1,7 +1,7 @@ {{define "latest_lts_dbr_version" -}} - 13.3.x-scala2.12 + 15.4.x-scala2.12 {{- end}} {{define "latest_lts_db_connect_version_spec" -}} - >=13.3,<13.4 + >=15.4,<15.5 {{- end}} From 70ce80251853ae6443ea280ba6b2bc4614c7267a Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Fri, 30 Aug 2024 15:29:00 +0200 Subject: [PATCH 09/15] PythonMutator: preserve normalize diagnostics (#1735) ## Changes Preserve diagnostics if there are any errors or warnings when PythonMutator normalizes output. If anything goes wrong during conversion, diagnostics contain the relevant location and path. ## Tests Unit tests --- .../config/mutator/python/python_mutator.go | 35 +++++++++++-------- .../mutator/python/python_mutator_test.go | 20 ++++++++++- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index 4f44df0a9..bf30f0704 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -205,10 +205,8 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr) } - output, err := loadOutputFile(rootPath, outputPath) - if err != nil { - return dyn.InvalidValue, diag.Errorf("failed to load Python mutator output: %s", err) - } + output, outputDiags := loadOutputFile(rootPath, outputPath) + pythonDiagnostics = pythonDiagnostics.Extend(outputDiags) // we pass through pythonDiagnostic because it contains warnings return output, pythonDiagnostics @@ -225,10 +223,10 @@ func writeInputFile(inputPath string, input dyn.Value) error { return os.WriteFile(inputPath, rootConfigJson, 0600) } -func loadOutputFile(rootPath string, outputPath string) (dyn.Value, error) { +func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnostics) { outputFile, err := os.Open(outputPath) if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to open output file: %w", err) + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err)) } defer outputFile.Close() @@ -243,27 +241,34 @@ func loadOutputFile(rootPath string, outputPath string) (dyn.Value, error) { // for that, we pass virtualPath instead of outputPath as file location virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_pydabs__.yml")) if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to get absolute path: %w", err) + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err)) } generated, err := yamlloader.LoadYAML(virtualPath, outputFile) if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to parse output file: %w", err) + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err)) } - normalized, diagnostic := convert.Normalize(config.Root{}, generated) - if diagnostic.Error() != nil { - return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %w", diagnostic.Error()) - } + return strictNormalize(config.Root{}, generated) +} + +func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) { + normalized, diags := convert.Normalize(dst, generated) // warnings shouldn't happen because output should be already normalized // when it happens, it's a bug in the mutator, and should be treated as an error - for _, d := range diagnostic.Filter(diag.Warning) { - return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %s", d.Summary) + strictDiags := diag.Diagnostics{} + + for _, d := range diags { + if d.Severity == diag.Warning { + d.Severity = diag.Error + } + + strictDiags = strictDiags.Append(d) } - return normalized, nil + return normalized, strictDiags } // loadDiagnosticsFile loads diagnostics from a file. diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index ea02d1ced..26350d886 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -10,6 +10,8 @@ import ( "runtime" "testing" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/dyn/merge" "github.com/databricks/cli/bundle/env" @@ -255,7 +257,7 @@ func TestPythonMutator_badOutput(t *testing.T) { mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) - assert.EqualError(t, diag.Error(), "failed to load Python mutator output: failed to normalize output: unknown field: unknown_property") + assert.EqualError(t, diag.Error(), "unknown field: unknown_property") } func TestPythonMutator_disabled(t *testing.T) { @@ -546,6 +548,22 @@ func TestInterpreterPath(t *testing.T) { } } +func TestStrictNormalize(t *testing.T) { + // NB: there is no way to trigger diag.Error, so we don't test it + + type TestStruct struct { + A int `json:"a"` + } + + value := dyn.NewValue(map[string]dyn.Value{"A": dyn.NewValue("abc", nil)}, nil) + + _, diags := convert.Normalize(TestStruct{}, value) + _, strictDiags := strictNormalize(TestStruct{}, value) + + assert.False(t, diags.HasError()) + assert.True(t, strictDiags.HasError()) +} + func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) From 5d9910c8e0f3119276b6ee4afd534ac82d4fb6c2 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 2 Sep 2024 14:09:08 +0530 Subject: [PATCH 10/15] Make lock optional in the JSON schema (#1738) Fixes https://github.com/databricks/cli/issues/1561 --- bundle/config/deployment.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/config/deployment.go b/bundle/config/deployment.go index 7f0f57a8c..b7efb4456 100644 --- a/bundle/config/deployment.go +++ b/bundle/config/deployment.go @@ -6,5 +6,5 @@ type Deployment struct { FailOnActiveRuns bool `json:"fail_on_active_runs,omitempty"` // Lock configures locking behavior on deployment. - Lock Lock `json:"lock"` + Lock Lock `json:"lock,omitempty"` } From 582558cac22017c2265b384c5380179975dc6f3d Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 2 Sep 2024 11:17:18 +0200 Subject: [PATCH 11/15] Do not suppress normalisation diagnostics for resolving variables (#1740) ## Changes Tested on the following bundle configuration ``` bundle: name: clusters mode: development variables: webhook_notifications: description: Webhook URL for notifications type: complex default: on_failure: id: 6a6c04c1-389c-4534-95af-b68b62a9dbe6 resources: jobs: test_job: name: "Andrew Nester Test Job" tasks: - task_key: test_task notebook_task: notebook_path: "./src/test.py" new_cluster: num_workers: 2 node_type_id: "i3.xlarge" autoscale: min_workers: 2 max_workers: 7 spark_version: "12.2.x-scala2.12" spark_conf: "spark.executor.memory": "2g" webhook_notifications: ${var.webhook_notifications} ``` bundle validate output is below ``` andrew.nester@HFW9Y94129 wheel % databricks bundle validate Warning: expected sequence, found map at resources.jobs.test_job.webhook_notifications.on_failure in bundle.yml:11:9 Name: clusters Target: default Workspace: User: andrew.nester@databricks.com Path: /Users/andrew.nester@databricks.com/.bundle/clusters/default ``` **Note** that error correctly points to the variable --- .../config/mutator/resolve_variable_references.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 61940be56..5e5b76109 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -10,7 +10,6 @@ import ( "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/convert" "github.com/databricks/cli/libs/dyn/dynvar" - "github.com/databricks/cli/libs/log" ) type resolveVariableReferences struct { @@ -124,6 +123,7 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) // We rewrite it here to make the resolution logic simpler. varPath := dyn.NewPath(dyn.Key("var")) + var diags diag.Diagnostics err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { // Synthesize a copy of the root that has all fields that are present in the type // but not set in the dynamic value set to their corresponding empty value. @@ -180,14 +180,13 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) // Normalize the result because variable resolution may have been applied to non-string fields. // For example, a variable reference may have been resolved to a integer. - root, diags := convert.Normalize(b.Config, root) - for _, diag := range diags { - // This occurs when a variable's resolved value is incompatible with the field's type. - // Log a warning until we have a better way to surface these diagnostics to the user. - log.Warnf(ctx, "normalization diagnostic: %s", diag.Summary) - } + root, normaliseDiags := convert.Normalize(b.Config, root) + diags = diags.Extend(normaliseDiags) return root, nil }) - return diag.FromErr(err) + if err != nil { + diags = diags.Extend(diag.FromErr(err)) + } + return diags } From ed448815b4e9881494d3ef561ec84ef694ea4b11 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Mon, 2 Sep 2024 11:49:30 +0200 Subject: [PATCH 12/15] PythonMutator: explain missing package error (#1736) ## Changes Explain the error when the `databricks-pydabs` package is not installed or the Python environment isn't correctly activated. Example output: ``` Error: python mutator process failed: ".venv/bin/python3 -m databricks.bundles.build --phase load --input .../input.json --output .../output.json --diagnostics .../diagnostics.json: exit status 1", use --debug to enable logging .../.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks') Explanation: 'databricks-pydabs' library is not installed in the Python environment. If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, and that the wheel is installed in the Python environment: $ .venv/bin/pip install -e . If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, or activate the environment before running CLI commands: experimental: pydabs: venv_path: .venv ``` ## Tests Unit tests --- .../config/mutator/python/python_mutator.go | 49 +++++++++++++++++-- .../mutator/python/python_mutator_test.go | 24 +++++++++ 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index bf30f0704..fbf3b7e0b 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -1,15 +1,21 @@ package python import ( + "bytes" "context" "encoding/json" "errors" "fmt" + "io" "os" "path/filepath" - "github.com/databricks/cli/libs/python" "github.com/databricks/databricks-sdk-go/logger" + "github.com/fatih/color" + + "strings" + + "github.com/databricks/cli/libs/python" "github.com/databricks/cli/bundle/env" @@ -169,7 +175,11 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err) } - stderrWriter := newLogWriter(ctx, "stderr: ") + stderrBuf := bytes.Buffer{} + stderrWriter := io.MultiWriter( + newLogWriter(ctx, "stderr: "), + &stderrBuf, + ) stdoutWriter := newLogWriter(ctx, "stdout: ") _, processErr := process.Background( @@ -197,7 +207,13 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r // process can fail without reporting errors in diagnostics file or creating it, for instance, // venv doesn't have PyDABs library installed if processErr != nil { - return dyn.InvalidValue, diag.Errorf("python mutator process failed: %sw, use --debug to enable logging", processErr) + diagnostic := diag.Diagnostic{ + Severity: diag.Error, + Summary: fmt.Sprintf("python mutator process failed: %q, use --debug to enable logging", processErr), + Detail: explainProcessErr(stderrBuf.String()), + } + + return dyn.InvalidValue, diag.Diagnostics{diagnostic} } // or we can fail to read diagnostics file, that should always be created @@ -212,6 +228,33 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return output, pythonDiagnostics } +const installExplanation = `If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, +and that the wheel is installed in the Python environment: + + $ .venv/bin/pip install -e . + +If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, +or activate the environment before running CLI commands: + + experimental: + pydabs: + venv_path: .venv +` + +// explainProcessErr provides additional explanation for common errors. +// It's meant to be the best effort, and not all errors are covered. +// Output should be used only used for error reporting. +func explainProcessErr(stderr string) string { + // implemented in cpython/Lib/runpy.py and portable across Python 3.x, including pypy + if strings.Contains(stderr, "Error while finding module specification for 'databricks.bundles.build'") { + summary := color.CyanString("Explanation: ") + "'databricks-pydabs' library is not installed in the Python environment.\n" + + return stderr + "\n" + summary + "\n" + installExplanation + } + + return stderr +} + func writeInputFile(inputPath string, input dyn.Value) error { // we need to marshal dyn.Value instead of bundle.Config to JSON to support // non-string fields assigned with bundle variables diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index 26350d886..bf12b2499 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -564,6 +564,30 @@ func TestStrictNormalize(t *testing.T) { assert.True(t, strictDiags.HasError()) } +func TestExplainProcessErr(t *testing.T) { + stderr := "/home/test/.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks')\n" + expected := `/home/test/.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks') + +Explanation: 'databricks-pydabs' library is not installed in the Python environment. + +If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, +and that the wheel is installed in the Python environment: + + $ .venv/bin/pip install -e . + +If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, +or activate the environment before running CLI commands: + + experimental: + pydabs: + venv_path: .venv +` + + out := explainProcessErr(stderr) + + assert.Equal(t, expected, out) +} + func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) From 0ce7be8ff07dcf1f57d41b620b209f73e20df118 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 14:39:16 +0200 Subject: [PATCH 13/15] Bump github.com/Masterminds/semver/v3 from 3.2.1 to 3.3.0 (#1741) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [github.com/Masterminds/semver/v3](https://github.com/Masterminds/semver) from 3.2.1 to 3.3.0.
Release notes

Sourced from github.com/Masterminds/semver/v3's releases.

v3.3.0

What's Changed

New Contributors

Full Changelog: https://github.com/Masterminds/semver/compare/v3.2.1...v3.3.0

Changelog

Sourced from github.com/Masterminds/semver/v3's changelog.

3.3.0 (2024-08-27)

Added

Changed

  • #241: Simplify StrictNewVersion parsing (thanks @​grosser)
  • Testing support up through Go 1.23
  • Minimum version set to 1.21 as this is what's tested now
  • Fuzz testing now supports caching
Commits
  • e6e3d4d Merge pull request #249 from mattfarina/update-changelog-3.3.0
  • e80c4ea Updating changelog for 3.3.0
  • 80427ad Merge pull request #248 from mattfarina/bump-min-version
  • b610837 bumping min version in go.mod based on what's tested
  • a4cccd8 Merge pull request #246 from mattfarina/bump-go-1.23
  • 7c178cf Updating the testing version of Go used
  • 29f94c1 Merge pull request #241 from grosser/grosser/validate
  • 2cf1b16 Merge pull request #245 from mattfarina/remove-vert
  • b55476a Removing reference to vert
  • d07450b simplify StrictNewVersion
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/Masterminds/semver/v3&package-manager=go_modules&previous-version=3.2.1&new-version=3.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 4aa279921..9777106c2 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/databricks/cli go 1.22 require ( - github.com/Masterminds/semver/v3 v3.2.1 // MIT + github.com/Masterminds/semver/v3 v3.3.0 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 github.com/databricks/databricks-sdk-go v0.45.0 // Apache 2.0 github.com/fatih/color v1.17.0 // MIT diff --git a/go.sum b/go.sum index 2e58948aa..b232e8e43 100644 --- a/go.sum +++ b/go.sum @@ -8,8 +8,8 @@ cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1h dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0= -github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= +github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= +github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= github.com/ProtonMail/go-crypto v1.1.0-alpha.2 h1:bkyFVUP+ROOARdgCiJzNQo2V2kiB97LyUpzH9P6Hrlg= From 096123674afb6c593f2c242c3210fa5735ed98cb Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 2 Sep 2024 19:13:17 +0530 Subject: [PATCH 14/15] Fix streaming of stdout, stdin, stderr in cobra test runner (#1742) ## Changes We were not using the readers and writers set in the test fixtures in the progress logger. This PR fixes that. It also modifies `TestAccAbortBind`, which was implicitly relying on the bug. I encountered this bug while working on https://github.com/databricks/cli/pull/1672. ## Tests Manually. From non-tty: ``` Error: failed to bind the resource, err: This bind operation requires user confirmation, but the current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed. ``` From tty, bind works as expected. ``` Confirm import changes? Changes will be remotely applied only after running 'bundle deploy'. [y/n]: y Updating deployment state... Successfully bound databricks_pipeline with an id '9d2dedbb-f522-4503-96ba-4bc4d5bfa77d'. Run 'bundle deploy' to deploy changes to your workspace ``` --- bundle/deploy/terraform/import.go | 5 +++++ cmd/root/progress_logger.go | 6 ++++++ internal/bundle/bind_resource_test.go | 8 ++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/bundle/deploy/terraform/import.go b/bundle/deploy/terraform/import.go index 7c1a68158..dfe60a581 100644 --- a/bundle/deploy/terraform/import.go +++ b/bundle/deploy/terraform/import.go @@ -69,6 +69,11 @@ func (m *importResource) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn // Remove output starting from Warning until end of output output = output[:bytes.Index([]byte(output), []byte("Warning:"))] cmdio.LogString(ctx, output) + + if !cmdio.IsPromptSupported(ctx) { + return diag.Errorf("This bind operation requires user confirmation, but the current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed.") + } + ans, err := cmdio.AskYesOrNo(ctx, "Confirm import changes? Changes will be remotely applied only after running 'bundle deploy'.") if err != nil { return diag.FromErr(err) diff --git a/cmd/root/progress_logger.go b/cmd/root/progress_logger.go index c05ecb043..7d6a1fa46 100644 --- a/cmd/root/progress_logger.go +++ b/cmd/root/progress_logger.go @@ -29,6 +29,12 @@ func (f *progressLoggerFlag) resolveModeDefault(format flags.ProgressLogFormat) } func (f *progressLoggerFlag) initializeContext(ctx context.Context) (context.Context, error) { + // No need to initialize the logger if it's already set in the context. This + // happens in unit tests where the logger is setup as a fixture. + if _, ok := cmdio.FromContext(ctx); ok { + return ctx, nil + } + if f.log.level.String() != "disabled" && f.log.file.String() == "stderr" && f.ProgressLogFormat == flags.ModeInplace { return nil, fmt.Errorf("inplace progress logging cannot be used when log-file is stderr") diff --git a/internal/bundle/bind_resource_test.go b/internal/bundle/bind_resource_test.go index d44ad2c31..2449c31f2 100644 --- a/internal/bundle/bind_resource_test.go +++ b/internal/bundle/bind_resource_test.go @@ -11,6 +11,7 @@ import ( "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/google/uuid" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -101,12 +102,15 @@ func TestAccAbortBind(t *testing.T) { destroyBundle(t, ctx, bundleRoot) }) + // Bind should fail because prompting is not possible. t.Setenv("BUNDLE_ROOT", bundleRoot) + t.Setenv("TERM", "dumb") c := internal.NewCobraTestRunner(t, "bundle", "deployment", "bind", "foo", fmt.Sprint(jobId)) - // Simulate user aborting the bind. This is done by not providing any input to the prompt in non-interactive mode. + // Expect error suggesting to use --auto-approve _, _, err = c.Run() - require.ErrorContains(t, err, "failed to bind the resource") + assert.ErrorContains(t, err, "failed to bind the resource") + assert.ErrorContains(t, err, "This bind operation requires user confirmation, but the current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") err = deployBundle(t, ctx, bundleRoot) require.NoError(t, err) From 072fa812e28c32d374afdb5409e7320f1c5894c7 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Tue, 3 Sep 2024 09:51:54 +0200 Subject: [PATCH 15/15] Include a permissions section in all templates (#1713) ## Changes This updates the templates to include a `permissions` section. Having a permissions section is a best practice, is helpful to understand the notion of permissions, and helps diagnose permission errors (https://github.com/databricks/cli/pull/1386). This is a cherry-pick from https://github.com/databricks/cli/pull/1387. This change was verified to work both in dev and prod. Existing unit tests validate the validity of the templates in these modes. --- .../{{.project_name}}/databricks.yml.tmpl | 17 ++++---- .../{{.project_name}}/databricks.yml.tmpl | 36 ++++------------- .../{{.project_name}}/databricks.yml.tmpl | 39 ++++++------------- 3 files changed, 28 insertions(+), 64 deletions(-) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index fdda03c0d..f96ce4fe6 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -12,8 +12,10 @@ include: targets: dev: default: true - # We use 'mode: development' to indicate this is a personal development copy. - # Any job schedules and triggers are paused by default. + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development workspace: host: {{workspace_host}} @@ -22,11 +24,10 @@ targets: mode: production workspace: host: {{workspace_host}} - # We always use /Users/{{user_name}} for all resources to make sure we only have a single copy. + # We explicitly specify /Users/{{user_name}} to make sure we only have a single copy. root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} - {{- if not is_service_principal}} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE run_as: - # This runs as {{user_name}} in production. We could also use a service principal here - # using service_principal_name (see the Databricks documentation). - user_name: {{user_name}} - {{- end}} + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl index e3572326b..8544dc834 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl @@ -7,44 +7,24 @@ include: - resources/*.yml targets: - # The 'dev' target, for development purposes. This target is the default. dev: - # We use 'mode: development' to indicate this is a personal development copy: + # The default target uses 'mode: development' to create a development copy. # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default - # - The 'development' mode is used for Delta Live Tables pipelines + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development default: true workspace: host: {{workspace_host}} - ## Optionally, there could be a 'staging' target here. - ## (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/ci-cd.html.) - # - # staging: - # workspace: - # host: {{workspace_host}} - - # The 'prod' target, used for production deployment. prod: - # We use 'mode: production' to indicate this is a production deployment. - # Doing so enables strict verification of the settings below. mode: production workspace: host: {{workspace_host}} - # We always use /Users/{{user_name}} for all resources to make sure we only have a single copy. - {{- /* - Internal note 2023-12: CLI versions v0.211.0 and before would show an error when using `mode: production` - with a path that doesn't say "/Shared". For now, we'll include an extra comment in the template - to explain that customers should update if they see this. - */}} - # If this path results in an error, please make sure you have a recent version of the CLI installed. + # We explicitly specify /Users/{{user_name}} to make sure we only have a single copy. root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE run_as: - {{- if is_service_principal}} - service_principal_name: {{user_name}} - {{- else}} - # This runs as {{user_name}} in production. We could also use a service principal here, - # see https://docs.databricks.com/dev-tools/bundles/permissions.html. - user_name: {{user_name}} - {{- end}} + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl index a47fb7c19..55c1aae4a 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -18,16 +18,16 @@ variables: {{- $dev_schema := .shared_schema }} {{- $prod_schema := .shared_schema }} {{- if (regexp "^yes").MatchString .personal_schemas}} -{{- $dev_schema = "${workspace.current_user.short_name}"}} -{{- $prod_schema = "default"}} + {{- $dev_schema = "${workspace.current_user.short_name}"}} + {{- $prod_schema = "default"}} {{- end}} -# Deployment targets. targets: - # The 'dev' target, for development purposes. This target is the default. dev: - # We use 'mode: development' to indicate this is a personal development copy. - # Any job schedules and triggers are paused by default + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development default: true workspace: @@ -37,35 +37,18 @@ targets: catalog: {{.default_catalog}} schema: {{$dev_schema}} - ## Optionally, there could be a 'staging' target here. - ## (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/ci-cd.html.) - # - # staging: - # workspace: - # host: {{workspace_host}} - - # The 'prod' target, used for production deployment. prod: - # We use 'mode: production' to indicate this is a production deployment. - # Doing so enables strict verification of the settings below. mode: production workspace: host: {{workspace_host}} - # We always use /Users/{{user_name}} for all resources to make sure we only have a single copy. - {{- /* - Internal note 2023-12: CLI versions v0.211.0 and before would show an error when using `mode: production` - with a path that doesn't say "/Shared". For now, we'll include an extra comment in the template - to explain that customers should update if they see this. - */}} - # If this path results in an error, please make sure you have a recent version of the CLI installed. + # We explicitly specify /Users/{{user_name}} to make sure we only have a single copy. root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} catalog: {{.default_catalog}} schema: {{$prod_schema}} - {{- if not is_service_principal}} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE run_as: - # This runs as {{user_name}} in production. We could also use a service principal here - # using service_principal_name (see https://docs.databricks.com/en/dev-tools/bundles/permissions.html). - user_name: {{user_name}} - {{end -}} + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}