mirror of https://github.com/databricks/cli.git
Compare commits
8 Commits
a8168c2f61
...
1378da8086
Author | SHA1 | Date |
---|---|---|
Gleb Kanterov | 1378da8086 | |
shreyas-goenka | b323703c1b | |
Ilya Kuznetsov | 490dd058aa | |
Gleb Kanterov | bbcbffd7f4 | |
Gleb Kanterov | 6b4641c530 | |
Gleb Kanterov | 55c36bc2fa | |
Gleb Kanterov | 6fd701ec84 | |
Gleb Kanterov | 37d677c079 |
|
@ -38,7 +38,7 @@ jobs:
|
||||||
- name: Setup Python
|
- name: Setup Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.9'
|
python-version: '3.10'
|
||||||
|
|
||||||
- name: Set go env
|
- name: Set go env
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -225,9 +225,21 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos
|
||||||
if config.IsExplicitlyEnabled((b.Config.Presets.SourceLinkedDeployment)) {
|
if config.IsExplicitlyEnabled((b.Config.Presets.SourceLinkedDeployment)) {
|
||||||
isDatabricksWorkspace := dbr.RunsOnRuntime(ctx) && strings.HasPrefix(b.SyncRootPath, "/Workspace/")
|
isDatabricksWorkspace := dbr.RunsOnRuntime(ctx) && strings.HasPrefix(b.SyncRootPath, "/Workspace/")
|
||||||
if !isDatabricksWorkspace {
|
if !isDatabricksWorkspace {
|
||||||
|
target := b.Config.Bundle.Target
|
||||||
|
path := dyn.NewPath(dyn.Key("targets"), dyn.Key(target), dyn.Key("presets"), dyn.Key("source_linked_deployment"))
|
||||||
|
diags = diags.Append(
|
||||||
|
diag.Diagnostic{
|
||||||
|
Severity: diag.Warning,
|
||||||
|
Summary: "source-linked deployment is available only in the Databricks Workspace",
|
||||||
|
Paths: []dyn.Path{
|
||||||
|
path,
|
||||||
|
},
|
||||||
|
Locations: b.Config.GetLocations(path[2:].String()),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
disabled := false
|
disabled := false
|
||||||
b.Config.Presets.SourceLinkedDeployment = &disabled
|
b.Config.Presets.SourceLinkedDeployment = &disabled
|
||||||
diags = diags.Extend(diag.Warningf("source-linked deployment is available only in the Databricks Workspace"))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,9 @@ import (
|
||||||
"github.com/databricks/cli/bundle/config"
|
"github.com/databricks/cli/bundle/config"
|
||||||
"github.com/databricks/cli/bundle/config/mutator"
|
"github.com/databricks/cli/bundle/config/mutator"
|
||||||
"github.com/databricks/cli/bundle/config/resources"
|
"github.com/databricks/cli/bundle/config/resources"
|
||||||
|
"github.com/databricks/cli/bundle/internal/bundletest"
|
||||||
"github.com/databricks/cli/libs/dbr"
|
"github.com/databricks/cli/libs/dbr"
|
||||||
|
"github.com/databricks/cli/libs/dyn"
|
||||||
"github.com/databricks/databricks-sdk-go/service/catalog"
|
"github.com/databricks/databricks-sdk-go/service/catalog"
|
||||||
"github.com/databricks/databricks-sdk-go/service/jobs"
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
@ -435,6 +437,7 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bundletest.SetLocation(b, "presets.source_linked_deployment", []dyn.Location{{File: "databricks.yml"}})
|
||||||
diags := bundle.Apply(tt.ctx, b, mutator.ApplyPresets())
|
diags := bundle.Apply(tt.ctx, b, mutator.ApplyPresets())
|
||||||
if diags.HasError() {
|
if diags.HasError() {
|
||||||
t.Fatalf("unexpected error: %v", diags)
|
t.Fatalf("unexpected error: %v", diags)
|
||||||
|
@ -442,6 +445,7 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) {
|
||||||
|
|
||||||
if tt.expectedWarning != "" {
|
if tt.expectedWarning != "" {
|
||||||
require.Equal(t, tt.expectedWarning, diags[0].Summary)
|
require.Equal(t, tt.expectedWarning, diags[0].Summary)
|
||||||
|
require.NotEmpty(t, diags[0].Locations)
|
||||||
}
|
}
|
||||||
|
|
||||||
require.Equal(t, tt.expectedValue, b.Config.Presets.SourceLinkedDeployment)
|
require.Equal(t, tt.expectedValue, b.Config.Presets.SourceLinkedDeployment)
|
||||||
|
|
|
@ -0,0 +1,137 @@
|
||||||
|
package validate
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/databricks/cli/bundle"
|
||||||
|
"github.com/databricks/cli/libs/diag"
|
||||||
|
"github.com/databricks/cli/libs/dyn"
|
||||||
|
"github.com/databricks/cli/libs/dyn/convert"
|
||||||
|
"github.com/databricks/cli/libs/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Validates that any single node clusters defined in the bundle are correctly configured.
|
||||||
|
func SingleNodeCluster() bundle.ReadOnlyMutator {
|
||||||
|
return &singleNodeCluster{}
|
||||||
|
}
|
||||||
|
|
||||||
|
type singleNodeCluster struct{}
|
||||||
|
|
||||||
|
func (m *singleNodeCluster) Name() string {
|
||||||
|
return "validate:SingleNodeCluster"
|
||||||
|
}
|
||||||
|
|
||||||
|
const singleNodeWarningDetail = `num_workers should be 0 only for single-node clusters. To create a
|
||||||
|
valid single node cluster please ensure that the following properties
|
||||||
|
are correctly set in the cluster specification:
|
||||||
|
|
||||||
|
spark_conf:
|
||||||
|
spark.databricks.cluster.profile: singleNode
|
||||||
|
spark.master: local[*]
|
||||||
|
|
||||||
|
custom_tags:
|
||||||
|
ResourceClass: SingleNode
|
||||||
|
`
|
||||||
|
|
||||||
|
const singleNodeWarningSummary = `Single node cluster is not correctly configured`
|
||||||
|
|
||||||
|
func showSingleNodeClusterWarning(ctx context.Context, v dyn.Value) bool {
|
||||||
|
// Check if the user has explicitly set the num_workers to 0. Skip the warning
|
||||||
|
// if that's not the case.
|
||||||
|
numWorkers, ok := v.Get("num_workers").AsInt()
|
||||||
|
if !ok || numWorkers > 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convenient type that contains the common fields from compute.ClusterSpec and
|
||||||
|
// pipelines.PipelineCluster that we are interested in.
|
||||||
|
type ClusterConf struct {
|
||||||
|
SparkConf map[string]string `json:"spark_conf"`
|
||||||
|
CustomTags map[string]string `json:"custom_tags"`
|
||||||
|
PolicyId string `json:"policy_id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
conf := &ClusterConf{}
|
||||||
|
err := convert.ToTyped(conf, v)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the policy id is set, we don't want to show the warning. This is because
|
||||||
|
// the user might have configured `spark_conf` and `custom_tags` correctly
|
||||||
|
// in their cluster policy.
|
||||||
|
if conf.PolicyId != "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
profile, ok := conf.SparkConf["spark.databricks.cluster.profile"]
|
||||||
|
if !ok {
|
||||||
|
log.Debugf(ctx, "spark_conf spark.databricks.cluster.profile not found in single-node cluster spec")
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if profile != "singleNode" {
|
||||||
|
log.Debugf(ctx, "spark_conf spark.databricks.cluster.profile is not singleNode in single-node cluster spec: %s", profile)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
master, ok := conf.SparkConf["spark.master"]
|
||||||
|
if !ok {
|
||||||
|
log.Debugf(ctx, "spark_conf spark.master not found in single-node cluster spec")
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix(master, "local") {
|
||||||
|
log.Debugf(ctx, "spark_conf spark.master does not start with local in single-node cluster spec: %s", master)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
resourceClass, ok := conf.CustomTags["ResourceClass"]
|
||||||
|
if !ok {
|
||||||
|
log.Debugf(ctx, "custom_tag ResourceClass not found in single-node cluster spec")
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if resourceClass != "SingleNode" {
|
||||||
|
log.Debugf(ctx, "custom_tag ResourceClass is not SingleNode in single-node cluster spec: %s", resourceClass)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *singleNodeCluster) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics {
|
||||||
|
diags := diag.Diagnostics{}
|
||||||
|
|
||||||
|
patterns := []dyn.Pattern{
|
||||||
|
// Interactive clusters
|
||||||
|
dyn.NewPattern(dyn.Key("resources"), dyn.Key("clusters"), dyn.AnyKey()),
|
||||||
|
// Job clusters
|
||||||
|
dyn.NewPattern(dyn.Key("resources"), dyn.Key("jobs"), dyn.AnyKey(), dyn.Key("job_clusters"), dyn.AnyIndex(), dyn.Key("new_cluster")),
|
||||||
|
// Job task clusters
|
||||||
|
dyn.NewPattern(dyn.Key("resources"), dyn.Key("jobs"), dyn.AnyKey(), dyn.Key("tasks"), dyn.AnyIndex(), dyn.Key("new_cluster")),
|
||||||
|
// Job for each task clusters
|
||||||
|
dyn.NewPattern(dyn.Key("resources"), dyn.Key("jobs"), dyn.AnyKey(), dyn.Key("tasks"), dyn.AnyIndex(), dyn.Key("for_each_task"), dyn.Key("task"), dyn.Key("new_cluster")),
|
||||||
|
// Pipeline clusters
|
||||||
|
dyn.NewPattern(dyn.Key("resources"), dyn.Key("pipelines"), dyn.AnyKey(), dyn.Key("clusters"), dyn.AnyIndex()),
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, p := range patterns {
|
||||||
|
_, err := dyn.MapByPattern(rb.Config().Value(), p, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
|
||||||
|
warning := diag.Diagnostic{
|
||||||
|
Severity: diag.Warning,
|
||||||
|
Summary: singleNodeWarningSummary,
|
||||||
|
Detail: singleNodeWarningDetail,
|
||||||
|
Locations: v.Locations(),
|
||||||
|
Paths: []dyn.Path{p},
|
||||||
|
}
|
||||||
|
|
||||||
|
if showSingleNodeClusterWarning(ctx, v) {
|
||||||
|
diags = append(diags, warning)
|
||||||
|
}
|
||||||
|
return v, nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf(ctx, "Error while applying single node cluster validation: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return diags
|
||||||
|
}
|
|
@ -0,0 +1,566 @@
|
||||||
|
package validate
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/databricks/cli/bundle"
|
||||||
|
"github.com/databricks/cli/bundle/config"
|
||||||
|
"github.com/databricks/cli/bundle/config/resources"
|
||||||
|
"github.com/databricks/cli/bundle/internal/bundletest"
|
||||||
|
"github.com/databricks/cli/libs/diag"
|
||||||
|
"github.com/databricks/cli/libs/dyn"
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/pipelines"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func failCases() []struct {
|
||||||
|
name string
|
||||||
|
sparkConf map[string]string
|
||||||
|
customTags map[string]string
|
||||||
|
} {
|
||||||
|
return []struct {
|
||||||
|
name string
|
||||||
|
sparkConf map[string]string
|
||||||
|
customTags map[string]string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "no tags or conf",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no tags",
|
||||||
|
sparkConf: map[string]string{
|
||||||
|
"spark.databricks.cluster.profile": "singleNode",
|
||||||
|
"spark.master": "local[*]",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no conf",
|
||||||
|
customTags: map[string]string{"ResourceClass": "SingleNode"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid spark cluster profile",
|
||||||
|
sparkConf: map[string]string{
|
||||||
|
"spark.databricks.cluster.profile": "invalid",
|
||||||
|
"spark.master": "local[*]",
|
||||||
|
},
|
||||||
|
customTags: map[string]string{"ResourceClass": "SingleNode"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid spark.master",
|
||||||
|
sparkConf: map[string]string{
|
||||||
|
"spark.databricks.cluster.profile": "singleNode",
|
||||||
|
"spark.master": "invalid",
|
||||||
|
},
|
||||||
|
customTags: map[string]string{"ResourceClass": "SingleNode"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid tags",
|
||||||
|
sparkConf: map[string]string{
|
||||||
|
"spark.databricks.cluster.profile": "singleNode",
|
||||||
|
"spark.master": "local[*]",
|
||||||
|
},
|
||||||
|
customTags: map[string]string{"ResourceClass": "invalid"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing ResourceClass tag",
|
||||||
|
sparkConf: map[string]string{
|
||||||
|
"spark.databricks.cluster.profile": "singleNode",
|
||||||
|
"spark.master": "local[*]",
|
||||||
|
},
|
||||||
|
customTags: map[string]string{"what": "ever"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing spark.master",
|
||||||
|
sparkConf: map[string]string{
|
||||||
|
"spark.databricks.cluster.profile": "singleNode",
|
||||||
|
},
|
||||||
|
customTags: map[string]string{"ResourceClass": "SingleNode"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing spark.databricks.cluster.profile",
|
||||||
|
sparkConf: map[string]string{
|
||||||
|
"spark.master": "local[*]",
|
||||||
|
},
|
||||||
|
customTags: map[string]string{"ResourceClass": "SingleNode"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateSingleNodeClusterFailForInteractiveClusters(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
for _, tc := range failCases() {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
b := &bundle.Bundle{
|
||||||
|
Config: config.Root{
|
||||||
|
Resources: config.Resources{
|
||||||
|
Clusters: map[string]*resources.Cluster{
|
||||||
|
"foo": {
|
||||||
|
ClusterSpec: &compute.ClusterSpec{
|
||||||
|
SparkConf: tc.sparkConf,
|
||||||
|
CustomTags: tc.customTags,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
bundletest.SetLocation(b, "resources.clusters.foo", []dyn.Location{{File: "a.yml", Line: 1, Column: 1}})
|
||||||
|
|
||||||
|
// We can't set num_workers to 0 explicitly in the typed configuration.
|
||||||
|
// Do it on the dyn.Value directly.
|
||||||
|
bundletest.Mutate(t, b, func(v dyn.Value) (dyn.Value, error) {
|
||||||
|
return dyn.Set(v, "resources.clusters.foo.num_workers", dyn.V(0))
|
||||||
|
})
|
||||||
|
diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
|
||||||
|
assert.Equal(t, diag.Diagnostics{
|
||||||
|
{
|
||||||
|
Severity: diag.Warning,
|
||||||
|
Summary: singleNodeWarningSummary,
|
||||||
|
Detail: singleNodeWarningDetail,
|
||||||
|
Locations: []dyn.Location{{File: "a.yml", Line: 1, Column: 1}},
|
||||||
|
Paths: []dyn.Path{dyn.NewPath(dyn.Key("resources"), dyn.Key("clusters"), dyn.Key("foo"))},
|
||||||
|
},
|
||||||
|
}, diags)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateSingleNodeClusterFailForJobClusters(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
for _, tc := range failCases() {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
b := &bundle.Bundle{
|
||||||
|
Config: config.Root{
|
||||||
|
Resources: config.Resources{
|
||||||
|
Jobs: map[string]*resources.Job{
|
||||||
|
"foo": {
|
||||||
|
JobSettings: &jobs.JobSettings{
|
||||||
|
JobClusters: []jobs.JobCluster{
|
||||||
|
{
|
||||||
|
NewCluster: compute.ClusterSpec{
|
||||||
|
ClusterName: "my_cluster",
|
||||||
|
SparkConf: tc.sparkConf,
|
||||||
|
CustomTags: tc.customTags,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
bundletest.SetLocation(b, "resources.jobs.foo.job_clusters[0].new_cluster", []dyn.Location{{File: "b.yml", Line: 1, Column: 1}})
|
||||||
|
|
||||||
|
// We can't set num_workers to 0 explicitly in the typed configuration.
|
||||||
|
// Do it on the dyn.Value directly.
|
||||||
|
bundletest.Mutate(t, b, func(v dyn.Value) (dyn.Value, error) {
|
||||||
|
return dyn.Set(v, "resources.jobs.foo.job_clusters[0].new_cluster.num_workers", dyn.V(0))
|
||||||
|
})
|
||||||
|
|
||||||
|
diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
|
||||||
|
assert.Equal(t, diag.Diagnostics{
|
||||||
|
{
|
||||||
|
Severity: diag.Warning,
|
||||||
|
Summary: singleNodeWarningSummary,
|
||||||
|
Detail: singleNodeWarningDetail,
|
||||||
|
Locations: []dyn.Location{{File: "b.yml", Line: 1, Column: 1}},
|
||||||
|
Paths: []dyn.Path{dyn.MustPathFromString("resources.jobs.foo.job_clusters[0].new_cluster")},
|
||||||
|
},
|
||||||
|
}, diags)
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateSingleNodeClusterFailForJobTaskClusters(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
for _, tc := range failCases() {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
b := &bundle.Bundle{
|
||||||
|
Config: config.Root{
|
||||||
|
Resources: config.Resources{
|
||||||
|
Jobs: map[string]*resources.Job{
|
||||||
|
"foo": {
|
||||||
|
JobSettings: &jobs.JobSettings{
|
||||||
|
Tasks: []jobs.Task{
|
||||||
|
{
|
||||||
|
NewCluster: &compute.ClusterSpec{
|
||||||
|
ClusterName: "my_cluster",
|
||||||
|
SparkConf: tc.sparkConf,
|
||||||
|
CustomTags: tc.customTags,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
bundletest.SetLocation(b, "resources.jobs.foo.tasks[0].new_cluster", []dyn.Location{{File: "c.yml", Line: 1, Column: 1}})
|
||||||
|
|
||||||
|
// We can't set num_workers to 0 explicitly in the typed configuration.
|
||||||
|
// Do it on the dyn.Value directly.
|
||||||
|
bundletest.Mutate(t, b, func(v dyn.Value) (dyn.Value, error) {
|
||||||
|
return dyn.Set(v, "resources.jobs.foo.tasks[0].new_cluster.num_workers", dyn.V(0))
|
||||||
|
})
|
||||||
|
|
||||||
|
diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
|
||||||
|
assert.Equal(t, diag.Diagnostics{
|
||||||
|
{
|
||||||
|
Severity: diag.Warning,
|
||||||
|
Summary: singleNodeWarningSummary,
|
||||||
|
Detail: singleNodeWarningDetail,
|
||||||
|
Locations: []dyn.Location{{File: "c.yml", Line: 1, Column: 1}},
|
||||||
|
Paths: []dyn.Path{dyn.MustPathFromString("resources.jobs.foo.tasks[0].new_cluster")},
|
||||||
|
},
|
||||||
|
}, diags)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateSingleNodeClusterFailForPipelineClusters(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
for _, tc := range failCases() {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
b := &bundle.Bundle{
|
||||||
|
Config: config.Root{
|
||||||
|
Resources: config.Resources{
|
||||||
|
Pipelines: map[string]*resources.Pipeline{
|
||||||
|
"foo": {
|
||||||
|
PipelineSpec: &pipelines.PipelineSpec{
|
||||||
|
Clusters: []pipelines.PipelineCluster{
|
||||||
|
{
|
||||||
|
SparkConf: tc.sparkConf,
|
||||||
|
CustomTags: tc.customTags,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
bundletest.SetLocation(b, "resources.pipelines.foo.clusters[0]", []dyn.Location{{File: "d.yml", Line: 1, Column: 1}})
|
||||||
|
|
||||||
|
// We can't set num_workers to 0 explicitly in the typed configuration.
|
||||||
|
// Do it on the dyn.Value directly.
|
||||||
|
bundletest.Mutate(t, b, func(v dyn.Value) (dyn.Value, error) {
|
||||||
|
return dyn.Set(v, "resources.pipelines.foo.clusters[0].num_workers", dyn.V(0))
|
||||||
|
})
|
||||||
|
|
||||||
|
diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
|
||||||
|
assert.Equal(t, diag.Diagnostics{
|
||||||
|
{
|
||||||
|
Severity: diag.Warning,
|
||||||
|
Summary: singleNodeWarningSummary,
|
||||||
|
Detail: singleNodeWarningDetail,
|
||||||
|
Locations: []dyn.Location{{File: "d.yml", Line: 1, Column: 1}},
|
||||||
|
Paths: []dyn.Path{dyn.MustPathFromString("resources.pipelines.foo.clusters[0]")},
|
||||||
|
},
|
||||||
|
}, diags)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateSingleNodeClusterFailForJobForEachTaskCluster(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
for _, tc := range failCases() {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
b := &bundle.Bundle{
|
||||||
|
Config: config.Root{
|
||||||
|
Resources: config.Resources{
|
||||||
|
Jobs: map[string]*resources.Job{
|
||||||
|
"foo": {
|
||||||
|
JobSettings: &jobs.JobSettings{
|
||||||
|
Tasks: []jobs.Task{
|
||||||
|
{
|
||||||
|
ForEachTask: &jobs.ForEachTask{
|
||||||
|
Task: jobs.Task{
|
||||||
|
NewCluster: &compute.ClusterSpec{
|
||||||
|
ClusterName: "my_cluster",
|
||||||
|
SparkConf: tc.sparkConf,
|
||||||
|
CustomTags: tc.customTags,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
bundletest.SetLocation(b, "resources.jobs.foo.tasks[0].for_each_task.task.new_cluster", []dyn.Location{{File: "e.yml", Line: 1, Column: 1}})
|
||||||
|
|
||||||
|
// We can't set num_workers to 0 explicitly in the typed configuration.
|
||||||
|
// Do it on the dyn.Value directly.
|
||||||
|
bundletest.Mutate(t, b, func(v dyn.Value) (dyn.Value, error) {
|
||||||
|
return dyn.Set(v, "resources.jobs.foo.tasks[0].for_each_task.task.new_cluster.num_workers", dyn.V(0))
|
||||||
|
})
|
||||||
|
|
||||||
|
diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
|
||||||
|
assert.Equal(t, diag.Diagnostics{
|
||||||
|
{
|
||||||
|
Severity: diag.Warning,
|
||||||
|
Summary: singleNodeWarningSummary,
|
||||||
|
Detail: singleNodeWarningDetail,
|
||||||
|
Locations: []dyn.Location{{File: "e.yml", Line: 1, Column: 1}},
|
||||||
|
Paths: []dyn.Path{dyn.MustPathFromString("resources.jobs.foo.tasks[0].for_each_task.task.new_cluster")},
|
||||||
|
},
|
||||||
|
}, diags)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func passCases() []struct {
|
||||||
|
name string
|
||||||
|
numWorkers *int
|
||||||
|
sparkConf map[string]string
|
||||||
|
customTags map[string]string
|
||||||
|
policyId string
|
||||||
|
} {
|
||||||
|
zero := 0
|
||||||
|
one := 1
|
||||||
|
|
||||||
|
return []struct {
|
||||||
|
name string
|
||||||
|
numWorkers *int
|
||||||
|
sparkConf map[string]string
|
||||||
|
customTags map[string]string
|
||||||
|
policyId string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "single node cluster",
|
||||||
|
sparkConf: map[string]string{
|
||||||
|
"spark.databricks.cluster.profile": "singleNode",
|
||||||
|
"spark.master": "local[*]",
|
||||||
|
},
|
||||||
|
customTags: map[string]string{
|
||||||
|
"ResourceClass": "SingleNode",
|
||||||
|
},
|
||||||
|
numWorkers: &zero,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "num workers is not zero",
|
||||||
|
numWorkers: &one,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "num workers is not set",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "policy id is not empty",
|
||||||
|
policyId: "policy-abc",
|
||||||
|
numWorkers: &zero,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateSingleNodeClusterPassInteractiveClusters(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
for _, tc := range passCases() {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
b := &bundle.Bundle{
|
||||||
|
Config: config.Root{
|
||||||
|
Resources: config.Resources{
|
||||||
|
Clusters: map[string]*resources.Cluster{
|
||||||
|
"foo": {
|
||||||
|
ClusterSpec: &compute.ClusterSpec{
|
||||||
|
SparkConf: tc.sparkConf,
|
||||||
|
CustomTags: tc.customTags,
|
||||||
|
PolicyId: tc.policyId,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if tc.numWorkers != nil {
|
||||||
|
bundletest.Mutate(t, b, func(v dyn.Value) (dyn.Value, error) {
|
||||||
|
return dyn.Set(v, "resources.clusters.foo.num_workers", dyn.V(*tc.numWorkers))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
|
||||||
|
assert.Empty(t, diags)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateSingleNodeClusterPassJobClusters(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
for _, tc := range passCases() {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
b := &bundle.Bundle{
|
||||||
|
Config: config.Root{
|
||||||
|
Resources: config.Resources{
|
||||||
|
Jobs: map[string]*resources.Job{
|
||||||
|
"foo": {
|
||||||
|
JobSettings: &jobs.JobSettings{
|
||||||
|
JobClusters: []jobs.JobCluster{
|
||||||
|
{
|
||||||
|
NewCluster: compute.ClusterSpec{
|
||||||
|
ClusterName: "my_cluster",
|
||||||
|
SparkConf: tc.sparkConf,
|
||||||
|
CustomTags: tc.customTags,
|
||||||
|
PolicyId: tc.policyId,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if tc.numWorkers != nil {
|
||||||
|
bundletest.Mutate(t, b, func(v dyn.Value) (dyn.Value, error) {
|
||||||
|
return dyn.Set(v, "resources.jobs.foo.job_clusters[0].new_cluster.num_workers", dyn.V(*tc.numWorkers))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
|
||||||
|
assert.Empty(t, diags)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateSingleNodeClusterPassJobTaskClusters(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
for _, tc := range passCases() {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
b := &bundle.Bundle{
|
||||||
|
Config: config.Root{
|
||||||
|
Resources: config.Resources{
|
||||||
|
Jobs: map[string]*resources.Job{
|
||||||
|
"foo": {
|
||||||
|
JobSettings: &jobs.JobSettings{
|
||||||
|
Tasks: []jobs.Task{
|
||||||
|
{
|
||||||
|
NewCluster: &compute.ClusterSpec{
|
||||||
|
ClusterName: "my_cluster",
|
||||||
|
SparkConf: tc.sparkConf,
|
||||||
|
CustomTags: tc.customTags,
|
||||||
|
PolicyId: tc.policyId,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if tc.numWorkers != nil {
|
||||||
|
bundletest.Mutate(t, b, func(v dyn.Value) (dyn.Value, error) {
|
||||||
|
return dyn.Set(v, "resources.jobs.foo.tasks[0].new_cluster.num_workers", dyn.V(*tc.numWorkers))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
|
||||||
|
assert.Empty(t, diags)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateSingleNodeClusterPassPipelineClusters(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
for _, tc := range passCases() {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
b := &bundle.Bundle{
|
||||||
|
Config: config.Root{
|
||||||
|
Resources: config.Resources{
|
||||||
|
Pipelines: map[string]*resources.Pipeline{
|
||||||
|
"foo": {
|
||||||
|
PipelineSpec: &pipelines.PipelineSpec{
|
||||||
|
Clusters: []pipelines.PipelineCluster{
|
||||||
|
{
|
||||||
|
SparkConf: tc.sparkConf,
|
||||||
|
CustomTags: tc.customTags,
|
||||||
|
PolicyId: tc.policyId,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if tc.numWorkers != nil {
|
||||||
|
bundletest.Mutate(t, b, func(v dyn.Value) (dyn.Value, error) {
|
||||||
|
return dyn.Set(v, "resources.pipelines.foo.clusters[0].num_workers", dyn.V(*tc.numWorkers))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
|
||||||
|
assert.Empty(t, diags)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateSingleNodeClusterPassJobForEachTaskCluster(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
for _, tc := range passCases() {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
b := &bundle.Bundle{
|
||||||
|
Config: config.Root{
|
||||||
|
Resources: config.Resources{
|
||||||
|
Jobs: map[string]*resources.Job{
|
||||||
|
"foo": {
|
||||||
|
JobSettings: &jobs.JobSettings{
|
||||||
|
Tasks: []jobs.Task{
|
||||||
|
{
|
||||||
|
ForEachTask: &jobs.ForEachTask{
|
||||||
|
Task: jobs.Task{
|
||||||
|
NewCluster: &compute.ClusterSpec{
|
||||||
|
ClusterName: "my_cluster",
|
||||||
|
SparkConf: tc.sparkConf,
|
||||||
|
CustomTags: tc.customTags,
|
||||||
|
PolicyId: tc.policyId,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if tc.numWorkers != nil {
|
||||||
|
bundletest.Mutate(t, b, func(v dyn.Value) (dyn.Value, error) {
|
||||||
|
return dyn.Set(v, "resources.jobs.foo.tasks[0].for_each_task.task.new_cluster.num_workers", dyn.V(*tc.numWorkers))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), SingleNodeCluster())
|
||||||
|
assert.Empty(t, diags)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -36,6 +36,7 @@ func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics
|
||||||
ValidateSyncPatterns(),
|
ValidateSyncPatterns(),
|
||||||
JobTaskClusterSpec(),
|
JobTaskClusterSpec(),
|
||||||
ValidateFolderPermissions(),
|
ValidateFolderPermissions(),
|
||||||
|
SingleNodeCluster(),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
bundle:
|
||||||
|
name: python-import-dataclass-no-wheel
|
||||||
|
|
||||||
|
experimental:
|
||||||
|
pydabs:
|
||||||
|
enabled: true
|
||||||
|
import:
|
||||||
|
- "my_job"
|
||||||
|
|
||||||
|
variables:
|
||||||
|
default_cluster_spec:
|
||||||
|
type: complex
|
||||||
|
value:
|
||||||
|
num_workers: 1
|
||||||
|
spark_version: "15.4.x-scala2.12"
|
|
@ -0,0 +1,2 @@
|
||||||
|
# Databricks Notebook Source
|
||||||
|
1+1
|
|
@ -0,0 +1,22 @@
|
||||||
|
from databricks.bundles.jobs import Job, Task, NotebookTask, JobCluster
|
||||||
|
from databricks.bundles.variables import Bundle
|
||||||
|
|
||||||
|
my_job = Job(
|
||||||
|
name="Test Job",
|
||||||
|
resource_name="my_job",
|
||||||
|
job_clusters=[
|
||||||
|
JobCluster(
|
||||||
|
job_cluster_key="my_cluster",
|
||||||
|
new_cluster=Bundle.variables.default_cluster_spec,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
tasks=[
|
||||||
|
Task(
|
||||||
|
task_key="my_notebook_task",
|
||||||
|
job_cluster_key="my_cluster",
|
||||||
|
notebook_task=NotebookTask(
|
||||||
|
notebook_path="notebooks/my_notebook.py",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,95 @@
|
||||||
|
package config_tests
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
pathlib "path"
|
||||||
|
"runtime"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/databricks/cli/bundle/config/resources"
|
||||||
|
"github.com/databricks/cli/libs/dyn"
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"golang.org/x/exp/maps"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPythonImport_dataclass_no_wheel(t *testing.T) {
|
||||||
|
activateVEnv(t)
|
||||||
|
setPythonPath(t, "python_import/dataclass_no_wheel/src")
|
||||||
|
|
||||||
|
expected := &resources.Job{
|
||||||
|
JobSettings: &jobs.JobSettings{
|
||||||
|
Name: "Test Job",
|
||||||
|
JobClusters: []jobs.JobCluster{
|
||||||
|
{
|
||||||
|
JobClusterKey: "my_cluster",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Tasks: []jobs.Task{
|
||||||
|
{
|
||||||
|
NotebookTask: &jobs.NotebookTask{
|
||||||
|
NotebookPath: "notebooks/my_notebook.py",
|
||||||
|
},
|
||||||
|
JobClusterKey: "my_cluster",
|
||||||
|
TaskKey: "my_notebook_task",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
b := load(t, "./python_import/dataclass_no_wheel")
|
||||||
|
|
||||||
|
assert.Equal(t, []string{"my_job"}, maps.Keys(b.Config.Resources.Jobs))
|
||||||
|
|
||||||
|
myJob := b.Config.Resources.Jobs["my_job"]
|
||||||
|
assert.Equal(t, expected, myJob)
|
||||||
|
|
||||||
|
// NewCluster is reference to a variable and needs to be checked separately
|
||||||
|
err := b.Config.Mutate(func(value dyn.Value) (dyn.Value, error) {
|
||||||
|
path := dyn.MustPathFromString("resources.jobs.my_job.job_clusters[0].new_cluster")
|
||||||
|
value, err := dyn.GetByPath(value, path)
|
||||||
|
if err != nil {
|
||||||
|
return dyn.InvalidValue, err
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Equal(t, "${var.default_cluster_spec}", value.AsAny())
|
||||||
|
|
||||||
|
return value, nil
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func setPythonPath(t *testing.T, path string) {
|
||||||
|
wd, err := os.Getwd()
|
||||||
|
require.NoError(t, err)
|
||||||
|
t.Setenv("PYTHONPATH", pathlib.Join(wd, path))
|
||||||
|
}
|
||||||
|
|
||||||
|
func activateVEnv(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
venvDir := pathlib.Join(dir, "venv")
|
||||||
|
|
||||||
|
err := exec.Command("python3", "-m", "venv", venvDir).Run()
|
||||||
|
require.NoError(t, err, "failed to create venv")
|
||||||
|
|
||||||
|
// we don't have shell to activate venv, updating PATH is enough
|
||||||
|
|
||||||
|
var venvBinDir string
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
venvBinDir = pathlib.Join(venvDir, "Scripts")
|
||||||
|
t.Setenv("PATH", venvBinDir+";"+os.Getenv("PATH"))
|
||||||
|
} else {
|
||||||
|
venvBinDir = pathlib.Join(venvDir, "bin")
|
||||||
|
t.Setenv("PATH", venvBinDir+":"+os.Getenv("PATH"))
|
||||||
|
}
|
||||||
|
|
||||||
|
err = exec.Command(
|
||||||
|
pathlib.Join(venvBinDir, "pip"),
|
||||||
|
"install",
|
||||||
|
"databricks-pydabs==0.5.1",
|
||||||
|
).Run()
|
||||||
|
require.NoError(t, err, "failed to install databricks-pydabs")
|
||||||
|
}
|
Loading…
Reference in New Issue