databricks-cli/libs/databrickscfg/cfgpickers/clusters_test.go

package cfgpickers

import (
	"context"
	"testing"

	"github.com/databricks/cli/libs/cmdio"
	"github.com/databricks/databricks-sdk-go"
	"github.com/databricks/databricks-sdk-go/qa"
	"github.com/databricks/databricks-sdk-go/service/compute"
	"github.com/databricks/databricks-sdk-go/service/iam"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

func TestIsCompatible(t *testing.T) {
	require.True(t, IsCompatibleWithUC(compute.ClusterDetails{
		SparkVersion:     "13.2.x-aarch64-scala2.12",
		DataSecurityMode: compute.DataSecurityModeUserIsolation,
	}, "13.0"))
	require.False(t, IsCompatibleWithUC(compute.ClusterDetails{
		SparkVersion:     "13.2.x-aarch64-scala2.12",
		DataSecurityMode: compute.DataSecurityModeNone,
	}, "13.0"))
	require.False(t, IsCompatibleWithUC(compute.ClusterDetails{
		SparkVersion:     "9.1.x-photon-scala2.12",
		DataSecurityMode: compute.DataSecurityModeNone,
	}, "13.0"))
	require.False(t, IsCompatibleWithUC(compute.ClusterDetails{
		SparkVersion:     "9.1.x-photon-scala2.12",
		DataSecurityMode: compute.DataSecurityModeNone,
	}, "10.0"))
	require.False(t, IsCompatibleWithUC(compute.ClusterDetails{
		SparkVersion:     "custom-9.1.x-photon-scala2.12",
		DataSecurityMode: compute.DataSecurityModeNone,
	}, "14.0"))
}

func TestIsCompatibleWithSnapshots(t *testing.T) {
	require.True(t, IsCompatibleWithUC(compute.ClusterDetails{
		SparkVersion:     "14.x-snapshot-cpu-ml-scala2.12",
		DataSecurityMode: compute.DataSecurityModeUserIsolation,
	}, "14.0"))
}

func TestWithoutSystemClusters(t *testing.T) {
	fn := WithoutSystemClusters()

	// Sources to exclude.
	for _, v := range []string{
		"JOB",
		"PIPELINE",
		"SOME_UNKNOWN_VALUE",
	} {
		assert.False(t, fn(&compute.ClusterDetails{ClusterSource: compute.ClusterSource(v)}, nil))
	}

	// Sources to include.
	for _, v := range []string{
		"UI",
		"API",
	} {
		assert.True(t, fn(&compute.ClusterDetails{ClusterSource: compute.ClusterSource(v)}, nil))
	}
}

func TestFirstCompatibleCluster(t *testing.T) {
	cfg, server := qa.HTTPFixtures{
		{
			Method:   "GET",
			Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=API&filter_by.cluster_sources=UI&page_size=100",
			Response: compute.ListClustersResponse{
				Clusters: []compute.ClusterDetails{
					{
						ClusterId:        "abc-id",
						ClusterName:      "first shared",
						DataSecurityMode: compute.DataSecurityModeUserIsolation,
						SparkVersion:     "12.2.x-whatever",
						State:            compute.StateRunning,
					},
					{
						ClusterId:        "bcd-id",
						ClusterName:      "second personal",
						DataSecurityMode: compute.DataSecurityModeSingleUser,
						SparkVersion:     "14.5.x-whatever",
						State:            compute.StateRunning,
						SingleUserName:   "serge",
					},
				},
			},
		},
		{
			Method:   "GET",
			Resource: "/api/2.0/preview/scim/v2/Me",
			Response: iam.User{
				UserName: "serge",
			},
		},
		{
			Method:   "GET",
			Resource: "/api/2.1/clusters/spark-versions",
			Response: compute.GetSparkVersionsResponse{
				Versions: []compute.SparkVersion{
					{
						Key:  "14.5.x-whatever",
						Name: "14.5 (Awesome)",
					},
				},
			},
		},
	}.Config(t)
	defer server.Close()
	w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg)))

	ctx := cmdio.MockDiscard(context.Background())

	clusterID, err := AskForCluster(ctx, w, WithDatabricksConnect("13.1"))
	require.NoError(t, err)
	require.Equal(t, "bcd-id", clusterID)
}

func TestNoCompatibleClusters(t *testing.T) {
	cfg, server := qa.HTTPFixtures{
		{
			Method:   "GET",
			Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=API&filter_by.cluster_sources=UI&page_size=100",
			Response: compute.ListClustersResponse{
				Clusters: []compute.ClusterDetails{
					{
						ClusterId:        "abc-id",
						ClusterName:      "first shared",
						DataSecurityMode: compute.DataSecurityModeUserIsolation,
						SparkVersion:     "12.2.x-whatever",
						State:            compute.StateRunning,
					},
				},
			},
		},
		{
			Method:   "GET",
			Resource: "/api/2.0/preview/scim/v2/Me",
			Response: iam.User{
				UserName: "serge",
			},
		},
		{
			Method:   "GET",
			Resource: "/api/2.1/clusters/spark-versions",
			Response: compute.GetSparkVersionsResponse{
				Versions: []compute.SparkVersion{
					{
						Key:  "14.5.x-whatever",
						Name: "14.5 (Awesome)",
					},
				},
			},
		},
	}.Config(t)
	defer server.Close()
	w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg)))

	ctx := cmdio.MockDiscard(context.Background())
	_, err := AskForCluster(ctx, w, WithDatabricksConnect("13.1"))
	require.Equal(t, ErrNoCompatibleClusters, err)
}
Improved usability of `databricks auth login ... --configure-cluster` flow by displaying cluster type and runtime version (#956) This PR adds selectors for Databricks-connect compatible clusters and SQL warehouses Tested in https://github.com/databricks/cli/pull/914 2023-11-09 16:38:45 +00:00			`package cfgpickers`

			`import (`
			`"context"`
			`"testing"`

			`"github.com/databricks/cli/libs/cmdio"`
			`"github.com/databricks/databricks-sdk-go"`
			`"github.com/databricks/databricks-sdk-go/qa"`
			`"github.com/databricks/databricks-sdk-go/service/compute"`
			`"github.com/databricks/databricks-sdk-go/service/iam"`
Filter out system clusters for `--configure-cluster` (#1031) ## Changes Only clusters with their source attribute equal to `UI` or `API` should be presented in the dropdown. ## Tests Unit test and manual confirmation. 2023-11-30 09:59:11 +00:00			`"github.com/stretchr/testify/assert"`
Improved usability of `databricks auth login ... --configure-cluster` flow by displaying cluster type and runtime version (#956) This PR adds selectors for Databricks-connect compatible clusters and SQL warehouses Tested in https://github.com/databricks/cli/pull/914 2023-11-09 16:38:45 +00:00			`"github.com/stretchr/testify/require"`
			`)`

			`func TestIsCompatible(t *testing.T) {`
			`require.True(t, IsCompatibleWithUC(compute.ClusterDetails{`
			`SparkVersion: "13.2.x-aarch64-scala2.12",`
			`DataSecurityMode: compute.DataSecurityModeUserIsolation,`
			`}, "13.0"))`
			`require.False(t, IsCompatibleWithUC(compute.ClusterDetails{`
			`SparkVersion: "13.2.x-aarch64-scala2.12",`
			`DataSecurityMode: compute.DataSecurityModeNone,`
			`}, "13.0"))`
			`require.False(t, IsCompatibleWithUC(compute.ClusterDetails{`
			`SparkVersion: "9.1.x-photon-scala2.12",`
			`DataSecurityMode: compute.DataSecurityModeNone,`
			`}, "13.0"))`
			`require.False(t, IsCompatibleWithUC(compute.ClusterDetails{`
			`SparkVersion: "9.1.x-photon-scala2.12",`
			`DataSecurityMode: compute.DataSecurityModeNone,`
			`}, "10.0"))`
			`require.False(t, IsCompatibleWithUC(compute.ClusterDetails{`
			`SparkVersion: "custom-9.1.x-photon-scala2.12",`
			`DataSecurityMode: compute.DataSecurityModeNone,`
			`}, "14.0"))`
			`}`

			`func TestIsCompatibleWithSnapshots(t *testing.T) {`
			`require.True(t, IsCompatibleWithUC(compute.ClusterDetails{`
			`SparkVersion: "14.x-snapshot-cpu-ml-scala2.12",`
			`DataSecurityMode: compute.DataSecurityModeUserIsolation,`
			`}, "14.0"))`
			`}`

Filter out system clusters for `--configure-cluster` (#1031) ## Changes Only clusters with their source attribute equal to `UI` or `API` should be presented in the dropdown. ## Tests Unit test and manual confirmation. 2023-11-30 09:59:11 +00:00			`func TestWithoutSystemClusters(t *testing.T) {`
			`fn := WithoutSystemClusters()`

			`// Sources to exclude.`
			`for _, v := range []string{`
			`"JOB",`
			`"PIPELINE",`
			`"SOME_UNKNOWN_VALUE",`
			`} {`
			`assert.False(t, fn(&compute.ClusterDetails{ClusterSource: compute.ClusterSource(v)}, nil))`
			`}`

			`// Sources to include.`
			`for _, v := range []string{`
			`"UI",`
			`"API",`
			`} {`
			`assert.True(t, fn(&compute.ClusterDetails{ClusterSource: compute.ClusterSource(v)}, nil))`
			`}`
			`}`

Improved usability of `databricks auth login ... --configure-cluster` flow by displaying cluster type and runtime version (#956) This PR adds selectors for Databricks-connect compatible clusters and SQL warehouses Tested in https://github.com/databricks/cli/pull/914 2023-11-09 16:38:45 +00:00			`func TestFirstCompatibleCluster(t *testing.T) {`
			`cfg, server := qa.HTTPFixtures{`
			`{`
			`Method: "GET",`
Filter out system clusters in cluster picker (#2131) ## Changes As of the clusters API v2.1 the results include system clusters. On large workspaces this can lead to long load times and include many irrelevant results. The cluster picker should only show interactive clusters. Also see #1754. ## Tests Manually confirmed the picker runs fast on a large workspace. 2025-01-14 07:38:28 +00:00			`Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=API&filter_by.cluster_sources=UI&page_size=100",`
Improved usability of `databricks auth login ... --configure-cluster` flow by displaying cluster type and runtime version (#956) This PR adds selectors for Databricks-connect compatible clusters and SQL warehouses Tested in https://github.com/databricks/cli/pull/914 2023-11-09 16:38:45 +00:00			`Response: compute.ListClustersResponse{`
			`Clusters: []compute.ClusterDetails{`
			`{`
			`ClusterId: "abc-id",`
			`ClusterName: "first shared",`
			`DataSecurityMode: compute.DataSecurityModeUserIsolation,`
			`SparkVersion: "12.2.x-whatever",`
			`State: compute.StateRunning,`
			`},`
			`{`
			`ClusterId: "bcd-id",`
			`ClusterName: "second personal",`
			`DataSecurityMode: compute.DataSecurityModeSingleUser,`
			`SparkVersion: "14.5.x-whatever",`
			`State: compute.StateRunning,`
			`SingleUserName: "serge",`
			`},`
			`},`
			`},`
			`},`
			`{`
			`Method: "GET",`
			`Resource: "/api/2.0/preview/scim/v2/Me",`
			`Response: iam.User{`
			`UserName: "serge",`
			`},`
			`},`
			`{`
			`Method: "GET",`
Upgrade Go SDK to 0.44.0 (#1679) ## Changes Upgrade Go SDK to 0.44.0 --------- Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com> 2024-08-15 13:23:07 +00:00			`Resource: "/api/2.1/clusters/spark-versions",`
Improved usability of `databricks auth login ... --configure-cluster` flow by displaying cluster type and runtime version (#956) This PR adds selectors for Databricks-connect compatible clusters and SQL warehouses Tested in https://github.com/databricks/cli/pull/914 2023-11-09 16:38:45 +00:00			`Response: compute.GetSparkVersionsResponse{`
			`Versions: []compute.SparkVersion{`
			`{`
			`Key: "14.5.x-whatever",`
			`Name: "14.5 (Awesome)",`
			`},`
			`},`
			`},`
			`},`
			`}.Config(t)`
			`defer server.Close()`
			`w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg)))`

Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing. 2025-01-20 12:09:28 +00:00			`ctx := cmdio.MockDiscard(context.Background())`

Improved usability of `databricks auth login ... --configure-cluster` flow by displaying cluster type and runtime version (#956) This PR adds selectors for Databricks-connect compatible clusters and SQL warehouses Tested in https://github.com/databricks/cli/pull/914 2023-11-09 16:38:45 +00:00			`clusterID, err := AskForCluster(ctx, w, WithDatabricksConnect("13.1"))`
			`require.NoError(t, err)`
			`require.Equal(t, "bcd-id", clusterID)`
			`}`

			`func TestNoCompatibleClusters(t *testing.T) {`
			`cfg, server := qa.HTTPFixtures{`
			`{`
			`Method: "GET",`
Filter out system clusters in cluster picker (#2131) ## Changes As of the clusters API v2.1 the results include system clusters. On large workspaces this can lead to long load times and include many irrelevant results. The cluster picker should only show interactive clusters. Also see #1754. ## Tests Manually confirmed the picker runs fast on a large workspace. 2025-01-14 07:38:28 +00:00			`Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=API&filter_by.cluster_sources=UI&page_size=100",`
Improved usability of `databricks auth login ... --configure-cluster` flow by displaying cluster type and runtime version (#956) This PR adds selectors for Databricks-connect compatible clusters and SQL warehouses Tested in https://github.com/databricks/cli/pull/914 2023-11-09 16:38:45 +00:00			`Response: compute.ListClustersResponse{`
			`Clusters: []compute.ClusterDetails{`
			`{`
			`ClusterId: "abc-id",`
			`ClusterName: "first shared",`
			`DataSecurityMode: compute.DataSecurityModeUserIsolation,`
			`SparkVersion: "12.2.x-whatever",`
			`State: compute.StateRunning,`
			`},`
			`},`
			`},`
			`},`
			`{`
			`Method: "GET",`
			`Resource: "/api/2.0/preview/scim/v2/Me",`
			`Response: iam.User{`
			`UserName: "serge",`
			`},`
			`},`
			`{`
			`Method: "GET",`
Upgrade Go SDK to 0.44.0 (#1679) ## Changes Upgrade Go SDK to 0.44.0 --------- Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com> 2024-08-15 13:23:07 +00:00			`Resource: "/api/2.1/clusters/spark-versions",`
Improved usability of `databricks auth login ... --configure-cluster` flow by displaying cluster type and runtime version (#956) This PR adds selectors for Databricks-connect compatible clusters and SQL warehouses Tested in https://github.com/databricks/cli/pull/914 2023-11-09 16:38:45 +00:00			`Response: compute.GetSparkVersionsResponse{`
			`Versions: []compute.SparkVersion{`
			`{`
			`Key: "14.5.x-whatever",`
			`Name: "14.5 (Awesome)",`
			`},`
			`},`
			`},`
			`},`
			`}.Config(t)`
			`defer server.Close()`
			`w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg)))`

Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing. 2025-01-20 12:09:28 +00:00			`ctx := cmdio.MockDiscard(context.Background())`
Improved usability of `databricks auth login ... --configure-cluster` flow by displaying cluster type and runtime version (#956) This PR adds selectors for Databricks-connect compatible clusters and SQL warehouses Tested in https://github.com/databricks/cli/pull/914 2023-11-09 16:38:45 +00:00			`_, err := AskForCluster(ctx, w, WithDatabricksConnect("13.1"))`
			`require.Equal(t, ErrNoCompatibleClusters, err)`
			`}`