Merge df0a98066a into 72dde793d8

2024-11-18 23:50:24 +05:30 · 2024-11-18 23:50:24 +05:30 · 5d1c160822
parent 72dde793d8 df0a98066a
commit 5d1c160822
3 changed files with 645 additions and 0 deletions
--- a/bundle/config/validate/single_node_cluster.go
+++ b/bundle/config/validate/single_node_cluster.go
@ -0,0 +1,174 @@
+package validate
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/databricks/cli/bundle"
+	"github.com/databricks/cli/libs/diag"
+	"github.com/databricks/cli/libs/dyn"
+	"github.com/databricks/databricks-sdk-go/service/compute"
+	"github.com/databricks/databricks-sdk-go/service/pipelines"
+)
+
+// Validates that any single node clusters defined in the bundle are correctly configured.
+func SingleNodeCluster() bundle.ReadOnlyMutator {
+	return &singleNodeCluster{}
+}
+
+type singleNodeCluster struct{}
+
+func (m *singleNodeCluster) Name() string {
+	return "validate:SingleNodeCluster"
+}
+
+const singleNodeWarningDetail = `num_workers should be 0 only for single-node clusters. To create a
+valid single node cluster please ensure that the following properties
+are correctly set in the cluster specification:
+
+  spark_conf:
+    spark.databricks.cluster.profile: singleNode
+    spark.master: local[*]
+
+  custom_tags:
+    ResourceClass: SingleNode
+  `
+
+const singleNodeWarningSummary = `Single node cluster is not correctly configured`
+
+func validateSingleNodeCluster(spec *compute.ClusterSpec, l []dyn.Location, p dyn.Path) *diag.Diagnostic {
+	if spec == nil {
+		return nil
+	}
+
+	if spec.NumWorkers > 0 || spec.Autoscale != nil {
+		return nil
+	}
+
+	if spec.PolicyId != "" {
+		return nil
+	}
+
+	invalidSingleNodeWarning := &diag.Diagnostic{
+		Severity:  diag.Warning,
+		Summary:   singleNodeWarningSummary,
+		Detail:    singleNodeWarningDetail,
+		Locations: l,
+		Paths:     []dyn.Path{p},
+	}
+	profile, ok := spec.SparkConf["spark.databricks.cluster.profile"]
+	if !ok {
+		return invalidSingleNodeWarning
+	}
+	master, ok := spec.SparkConf["spark.master"]
+	if !ok {
+		return invalidSingleNodeWarning
+	}
+	resourceClass, ok := spec.CustomTags["ResourceClass"]
+	if !ok {
+		return invalidSingleNodeWarning
+	}
+
+	if profile == "singleNode" && strings.HasPrefix(master, "local") && resourceClass == "SingleNode" {
+		return nil
+	}
+
+	return invalidSingleNodeWarning
+}
+
+func validateSingleNodePipelineCluster(spec pipelines.PipelineCluster, l []dyn.Location, p dyn.Path) *diag.Diagnostic {
+	if spec.NumWorkers > 0 || spec.Autoscale != nil {
+		return nil
+	}
+
+	if spec.PolicyId != "" {
+		return nil
+	}
+
+	invalidSingleNodeWarning := &diag.Diagnostic{
+		Severity:  diag.Warning,
+		Summary:   singleNodeWarningSummary,
+		Detail:    singleNodeWarningDetail,
+		Locations: l,
+		Paths:     []dyn.Path{p},
+	}
+	profile, ok := spec.SparkConf["spark.databricks.cluster.profile"]
+	if !ok {
+		return invalidSingleNodeWarning
+	}
+	master, ok := spec.SparkConf["spark.master"]
+	if !ok {
+		return invalidSingleNodeWarning
+	}
+	resourceClass, ok := spec.CustomTags["ResourceClass"]
+	if !ok {
+		return invalidSingleNodeWarning
+	}
+
+	if profile == "singleNode" && strings.HasPrefix(master, "local") && resourceClass == "SingleNode" {
+		return nil
+	}
+
+	return invalidSingleNodeWarning
+}
+
+func (m *singleNodeCluster) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics {
+	diags := diag.Diagnostics{}
+
+	// Interactive clusters
+	for k, r := range rb.Config().Resources.Clusters {
+		p := dyn.NewPath(dyn.Key("resources"), dyn.Key("clusters"), dyn.Key(k))
+		l := rb.Config().GetLocations("resources.clusters." + k)
+
+		d := validateSingleNodeCluster(r.ClusterSpec, l, p)
+		if d != nil {
+			diags = append(diags, *d)
+		}
+	}
+
+	// Job clusters
+	for jobK, jobV := range rb.Config().Resources.Jobs {
+		for i, clusterV := range jobV.JobSettings.JobClusters {
+			p := dyn.NewPath(dyn.Key("resources"), dyn.Key("jobs"), dyn.Key(jobK), dyn.Key("job_clusters"), dyn.Index(i))
+			l := rb.Config().GetLocations(fmt.Sprintf("resources.jobs.%s.job_clusters[%d]", jobK, i))
+
+			d := validateSingleNodeCluster(&clusterV.NewCluster, l, p)
+			if d != nil {
+				diags = append(diags, *d)
+			}
+		}
+	}
+
+	// Job task clusters
+	for jobK, jobV := range rb.Config().Resources.Jobs {
+		for i, taskV := range jobV.JobSettings.Tasks {
+			if taskV.NewCluster == nil {
+				continue
+			}
+
+			p := dyn.NewPath(dyn.Key("resources"), dyn.Key("jobs"), dyn.Key(jobK), dyn.Key("tasks"), dyn.Index(i), dyn.Key("new_cluster"))
+			l := rb.Config().GetLocations(fmt.Sprintf("resources.jobs.%s.tasks[%d].new_cluster", jobK, i))
+
+			d := validateSingleNodeCluster(taskV.NewCluster, l, p)
+			if d != nil {
+				diags = append(diags, *d)
+			}
+		}
+	}
+
+	// Pipeline clusters
+	for pipelineK, pipelineV := range rb.Config().Resources.Pipelines {
+		for i, clusterV := range pipelineV.PipelineSpec.Clusters {
+			p := dyn.NewPath(dyn.Key("resources"), dyn.Key("pipelines"), dyn.Key(pipelineK), dyn.Key("clusters"), dyn.Index(i))
+			l := rb.Config().GetLocations(fmt.Sprintf("resources.pipelines.%s.clusters[%d]", pipelineK, i))
+
+			d := validateSingleNodePipelineCluster(clusterV, l, p)
+			if d != nil {
+				diags = append(diags, *d)
+			}
+		}
+	}
+
+	return diags
+}
--- a/bundle/config/validate/single_node_cluster_test.go
+++ b/bundle/config/validate/single_node_cluster_test.go
@ -0,0 +1,470 @@
+package validate
+
+import (
+	"context"
+	"testing"
+
+	"github.com/databricks/cli/bundle"
+	"github.com/databricks/cli/bundle/config"
+	"github.com/databricks/cli/bundle/config/resources"
+	"github.com/databricks/cli/bundle/internal/bundletest"
+	"github.com/databricks/cli/libs/diag"
+	"github.com/databricks/cli/libs/dyn"
+	"github.com/databricks/databricks-sdk-go/service/compute"
+	"github.com/databricks/databricks-sdk-go/service/jobs"
+	"github.com/databricks/databricks-sdk-go/service/pipelines"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestValidateSingleNodeClusterFail(t *testing.T) {
+	failCases := []struct {
+		name string
+		spec *compute.ClusterSpec
+	}{
+		{
+			name: "no tags or conf",
+			spec: &compute.ClusterSpec{
+				ClusterName: "foo",
+			},
+		},
+		{
+			name: "no tags",
+			spec: &compute.ClusterSpec{
+				SparkConf: map[string]string{
+					"spark.databricks.cluster.profile": "singleNode",
+					"spark.master":                     "local[*]",
+				},
+			},
+		},
+		{
+			name: "no conf",
+			spec: &compute.ClusterSpec{
+				CustomTags: map[string]string{
+					"ResourceClass": "SingleNode",
+				},
+			},
+		},
+		{
+			name: "invalid spark cluster profile",
+			spec: &compute.ClusterSpec{
+				SparkConf: map[string]string{
+					"spark.databricks.cluster.profile": "invalid",
+					"spark.master":                     "local[*]",
+				},
+				CustomTags: map[string]string{
+					"ResourceClass": "SingleNode",
+				},
+			},
+		},
+		{
+			name: "invalid spark.master",
+			spec: &compute.ClusterSpec{
+				SparkConf: map[string]string{
+					"spark.databricks.cluster.profile": "singleNode",
+					"spark.master":                     "invalid",
+				},
+				CustomTags: map[string]string{
+					"ResourceClass": "SingleNode",
+				},
+			},
+		},
+		{
+			name: "invalid tags",
+			spec: &compute.ClusterSpec{
+				SparkConf: map[string]string{
+					"spark.databricks.cluster.profile": "singleNode",
+					"spark.master":                     "local[*]",
+				},
+				CustomTags: map[string]string{
+					"ResourceClass": "invalid",
+				},
+			},
+		},
+	}
+
+	ctx := context.Background()
+
+	// Test interactive clusters.
+	for _, tc := range failCases {
+		t.Run("interactive_"+tc.name, func(t *testing.T) {
+			b := &bundle.Bundle{
+				Config: config.Root{
+					Resources: config.Resources{
+						Clusters: map[string]*resources.Cluster{
+							"foo": {
+								ClusterSpec: tc.spec,
+							},
+						},
+					},
+				},
+			}
+
+			bundletest.SetLocation(b, "resources.clusters.foo", []dyn.Location{{File: "a.yml", Line: 1, Column: 1}})
+
+			diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
+			assert.Equal(t, diag.Diagnostics{
+				{
+					Severity:  diag.Warning,
+					Summary:   singleNodeWarningSummary,
+					Detail:    singleNodeWarningDetail,
+					Locations: []dyn.Location{{File: "a.yml", Line: 1, Column: 1}},
+					Paths:     []dyn.Path{dyn.NewPath(dyn.Key("resources"), dyn.Key("clusters"), dyn.Key("foo"))},
+				},
+			}, diags)
+		})
+	}
+
+	// Test new job clusters.
+	for _, tc := range failCases {
+		t.Run("job_"+tc.name, func(t *testing.T) {
+			b := &bundle.Bundle{
+				Config: config.Root{
+					Resources: config.Resources{
+						Jobs: map[string]*resources.Job{
+							"foo": {
+								JobSettings: &jobs.JobSettings{
+									JobClusters: []jobs.JobCluster{
+										{
+											NewCluster: *tc.spec,
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			}
+
+			bundletest.SetLocation(b, "resources.jobs.foo.job_clusters[0]", []dyn.Location{{File: "b.yml", Line: 1, Column: 1}})
+
+			diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
+			assert.Equal(t, diag.Diagnostics{
+				{
+					Severity:  diag.Warning,
+					Summary:   singleNodeWarningSummary,
+					Detail:    singleNodeWarningDetail,
+					Locations: []dyn.Location{{File: "b.yml", Line: 1, Column: 1}},
+					Paths:     []dyn.Path{dyn.MustPathFromString("resources.jobs.foo.job_clusters[0]")},
+				},
+			}, diags)
+
+		})
+	}
+
+	// Test job task clusters.
+	for _, tc := range failCases {
+		t.Run("task_"+tc.name, func(t *testing.T) {
+			b := &bundle.Bundle{
+				Config: config.Root{
+					Resources: config.Resources{
+						Jobs: map[string]*resources.Job{
+							"foo": {
+								JobSettings: &jobs.JobSettings{
+									Tasks: []jobs.Task{
+										{
+											NewCluster: tc.spec,
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			}
+
+			bundletest.SetLocation(b, "resources.jobs.foo.tasks[0]", []dyn.Location{{File: "c.yml", Line: 1, Column: 1}})
+
+			diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
+			assert.Equal(t, diag.Diagnostics{
+				{
+					Severity:  diag.Warning,
+					Summary:   singleNodeWarningSummary,
+					Detail:    singleNodeWarningDetail,
+					Locations: []dyn.Location{{File: "c.yml", Line: 1, Column: 1}},
+					Paths:     []dyn.Path{dyn.MustPathFromString("resources.jobs.foo.tasks[0].new_cluster")},
+				},
+			}, diags)
+		})
+	}
+}
+
+func TestValidateSingleNodeClusterPass(t *testing.T) {
+	passCases := []struct {
+		name string
+		spec *compute.ClusterSpec
+	}{
+		{
+			name: "single node cluster",
+			spec: &compute.ClusterSpec{
+				SparkConf: map[string]string{
+					"spark.databricks.cluster.profile": "singleNode",
+					"spark.master":                     "local[*]",
+				},
+				CustomTags: map[string]string{
+					"ResourceClass": "SingleNode",
+				},
+			},
+		},
+		{
+			name: "num workers is not zero",
+			spec: &compute.ClusterSpec{
+				NumWorkers: 1,
+			},
+		},
+		{
+			name: "autoscale is not nil",
+			spec: &compute.ClusterSpec{
+				Autoscale: &compute.AutoScale{
+					MinWorkers: 1,
+				},
+			},
+		},
+		{
+			name: "policy id is not empty",
+			spec: &compute.ClusterSpec{
+				PolicyId: "policy-abc",
+			},
+		},
+	}
+
+	ctx := context.Background()
+
+	// Test interactive clusters.
+	for _, tc := range passCases {
+		t.Run("interactive_"+tc.name, func(t *testing.T) {
+			b := &bundle.Bundle{
+				Config: config.Root{
+					Resources: config.Resources{
+						Clusters: map[string]*resources.Cluster{
+							"foo": {
+								ClusterSpec: tc.spec,
+							},
+						},
+					},
+				},
+			}
+
+			diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
+			assert.Empty(t, diags)
+		})
+	}
+
+	// Test new job clusters.
+	for _, tc := range passCases {
+		t.Run("job_"+tc.name, func(t *testing.T) {
+			b := &bundle.Bundle{
+				Config: config.Root{
+					Resources: config.Resources{
+						Jobs: map[string]*resources.Job{
+							"foo": {
+								JobSettings: &jobs.JobSettings{
+									JobClusters: []jobs.JobCluster{
+										{
+											NewCluster: *tc.spec,
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			}
+
+			diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
+			assert.Empty(t, diags)
+		})
+	}
+
+	// Test job task clusters.
+	for _, tc := range passCases {
+		t.Run("task_"+tc.name, func(t *testing.T) {
+			b := &bundle.Bundle{
+				Config: config.Root{
+					Resources: config.Resources{
+						Jobs: map[string]*resources.Job{
+							"foo": {
+								JobSettings: &jobs.JobSettings{
+									Tasks: []jobs.Task{
+										{
+											NewCluster: tc.spec,
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			}
+
+			diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
+			assert.Empty(t, diags)
+		})
+	}
+}
+
+func TestValidateSingleNodePipelineClustersFail(t *testing.T) {
+	failCases := []struct {
+		name string
+		spec pipelines.PipelineCluster
+	}{
+		{
+			name: "no tags or conf",
+			spec: pipelines.PipelineCluster{
+				DriverInstancePoolId: "abcd",
+			},
+		},
+		{
+			name: "no tags",
+			spec: pipelines.PipelineCluster{
+				SparkConf: map[string]string{
+					"spark.databricks.cluster.profile": "singleNode",
+					"spark.master":                     "local[*]",
+				},
+			},
+		},
+		{
+			name: "no conf",
+			spec: pipelines.PipelineCluster{
+				CustomTags: map[string]string{
+					"ResourceClass": "SingleNode",
+				},
+			},
+		},
+		{
+			name: "invalid spark cluster profile",
+			spec: pipelines.PipelineCluster{
+				SparkConf: map[string]string{
+					"spark.databricks.cluster.profile": "invalid",
+					"spark.master":                     "local[*]",
+				},
+				CustomTags: map[string]string{
+					"ResourceClass": "SingleNode",
+				},
+			},
+		},
+		{
+			name: "invalid spark.master",
+			spec: pipelines.PipelineCluster{
+				SparkConf: map[string]string{
+					"spark.databricks.cluster.profile": "singleNode",
+					"spark.master":                     "invalid",
+				},
+				CustomTags: map[string]string{
+					"ResourceClass": "SingleNode",
+				},
+			},
+		},
+		{
+			name: "invalid tags",
+			spec: pipelines.PipelineCluster{
+				SparkConf: map[string]string{
+					"spark.databricks.cluster.profile": "singleNode",
+					"spark.master":                     "local[*]",
+				},
+				CustomTags: map[string]string{
+					"ResourceClass": "invalid",
+				},
+			},
+		},
+	}
+
+	ctx := context.Background()
+
+	for _, tc := range failCases {
+		t.Run(tc.name, func(t *testing.T) {
+			b := &bundle.Bundle{
+				Config: config.Root{
+					Resources: config.Resources{
+						Pipelines: map[string]*resources.Pipeline{
+							"foo": {
+								PipelineSpec: &pipelines.PipelineSpec{
+									Clusters: []pipelines.PipelineCluster{
+										tc.spec,
+									},
+								},
+							},
+						},
+					},
+				},
+			}
+
+			bundletest.SetLocation(b, "resources.pipelines.foo.clusters[0]", []dyn.Location{{File: "d.yml", Line: 1, Column: 1}})
+
+			diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
+			assert.Equal(t, diag.Diagnostics{
+				{
+					Severity:  diag.Warning,
+					Summary:   singleNodeWarningSummary,
+					Detail:    singleNodeWarningDetail,
+					Locations: []dyn.Location{{File: "d.yml", Line: 1, Column: 1}},
+					Paths:     []dyn.Path{dyn.MustPathFromString("resources.pipelines.foo.clusters[0]")},
+				},
+			}, diags)
+		})
+	}
+}
+
+func TestValidateSingleNodePipelineClustersPass(t *testing.T) {
+	passCases := []struct {
+		name string
+		spec pipelines.PipelineCluster
+	}{
+		{
+			name: "single node cluster",
+			spec: pipelines.PipelineCluster{
+				SparkConf: map[string]string{
+					"spark.databricks.cluster.profile": "singleNode",
+					"spark.master":                     "local[*]",
+				},
+				CustomTags: map[string]string{
+					"ResourceClass": "SingleNode",
+				},
+			},
+		},
+		{
+			name: "num workers is not zero",
+			spec: pipelines.PipelineCluster{
+				NumWorkers: 1,
+			},
+		},
+		{
+			name: "autoscale is not nil",
+			spec: pipelines.PipelineCluster{
+				Autoscale: &pipelines.PipelineClusterAutoscale{
+					MaxWorkers: 3,
+				},
+			},
+		},
+		{
+			name: "policy id is not empty",
+			spec: pipelines.PipelineCluster{
+				PolicyId: "policy-abc",
+			},
+		},
+	}
+
+	ctx := context.Background()
+
+	for _, tc := range passCases {
+		t.Run(tc.name, func(t *testing.T) {
+			b := &bundle.Bundle{
+				Config: config.Root{
+					Resources: config.Resources{
+						Pipelines: map[string]*resources.Pipeline{
+							"foo": {
+								PipelineSpec: &pipelines.PipelineSpec{
+									Clusters: []pipelines.PipelineCluster{
+										tc.spec,
+									},
+								},
+							},
+						},
+					},
+				},
+			}
+
+			diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
+			assert.Empty(t, diags)
+		})
+	}
+}
--- a/bundle/config/validate/validate.go
+++ b/bundle/config/validate/validate.go
@ -36,6 +36,7 @@ func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics
 		ValidateSyncPatterns(),
 		JobTaskClusterSpec(),
 		ValidateFolderPermissions(),
+		SingleNodeCluster(),
 	))
 }