mirror of https://github.com/databricks/cli.git
162 lines
3.8 KiB
Go
162 lines
3.8 KiB
Go
|
package validate
|
||
|
|
||
|
import (
|
||
|
"context"
|
||
|
"fmt"
|
||
|
"strings"
|
||
|
|
||
|
"github.com/databricks/cli/bundle"
|
||
|
"github.com/databricks/cli/libs/diag"
|
||
|
"github.com/databricks/cli/libs/dyn"
|
||
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||
|
)
|
||
|
|
||
|
// JobTaskClusterSpec validates that job tasks have cluster spec defined
|
||
|
// if task requires a cluster
|
||
|
func JobTaskClusterSpec() bundle.ReadOnlyMutator {
|
||
|
return &jobTaskClusterSpec{}
|
||
|
}
|
||
|
|
||
|
type jobTaskClusterSpec struct {
|
||
|
}
|
||
|
|
||
|
func (v *jobTaskClusterSpec) Name() string {
|
||
|
return "validate:job_task_cluster_spec"
|
||
|
}
|
||
|
|
||
|
func (v *jobTaskClusterSpec) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics {
|
||
|
diags := diag.Diagnostics{}
|
||
|
|
||
|
jobsPath := dyn.NewPath(dyn.Key("resources"), dyn.Key("jobs"))
|
||
|
|
||
|
for resourceName, job := range rb.Config().Resources.Jobs {
|
||
|
resourcePath := jobsPath.Append(dyn.Key(resourceName))
|
||
|
|
||
|
for taskIndex, task := range job.Tasks {
|
||
|
taskPath := resourcePath.Append(dyn.Key("tasks"), dyn.Index(taskIndex))
|
||
|
|
||
|
diags = diags.Extend(validateJobTask(rb, task, taskPath))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return diags
|
||
|
}
|
||
|
|
||
|
func validateJobTask(rb bundle.ReadOnlyBundle, task jobs.Task, taskPath dyn.Path) diag.Diagnostics {
|
||
|
diags := diag.Diagnostics{}
|
||
|
|
||
|
var specified []string
|
||
|
var unspecified []string
|
||
|
|
||
|
if task.JobClusterKey != "" {
|
||
|
specified = append(specified, "job_cluster_key")
|
||
|
} else {
|
||
|
unspecified = append(unspecified, "job_cluster_key")
|
||
|
}
|
||
|
|
||
|
if task.EnvironmentKey != "" {
|
||
|
specified = append(specified, "environment_key")
|
||
|
} else {
|
||
|
unspecified = append(unspecified, "environment_key")
|
||
|
}
|
||
|
|
||
|
if task.ExistingClusterId != "" {
|
||
|
specified = append(specified, "existing_cluster_id")
|
||
|
} else {
|
||
|
unspecified = append(unspecified, "existing_cluster_id")
|
||
|
}
|
||
|
|
||
|
if task.NewCluster != nil {
|
||
|
specified = append(specified, "new_cluster")
|
||
|
} else {
|
||
|
unspecified = append(unspecified, "new_cluster")
|
||
|
}
|
||
|
|
||
|
if task.ForEachTask != nil {
|
||
|
forEachTaskPath := taskPath.Append(dyn.Key("for_each_task"), dyn.Key("task"))
|
||
|
|
||
|
diags = diags.Extend(validateJobTask(rb, task.ForEachTask.Task, forEachTaskPath))
|
||
|
}
|
||
|
|
||
|
if isComputeTask(task) && len(specified) == 0 {
|
||
|
if task.NotebookTask != nil {
|
||
|
// notebook tasks without cluster spec will use notebook environment
|
||
|
} else {
|
||
|
// path might be not very helpful, adding user-specified task key clarifies the context
|
||
|
detail := fmt.Sprintf(
|
||
|
"Task %q requires a cluster or an environment to run.\nSpecify one of the following fields: %s.",
|
||
|
task.TaskKey,
|
||
|
strings.Join(unspecified, ", "),
|
||
|
)
|
||
|
|
||
|
diags = diags.Append(diag.Diagnostic{
|
||
|
Severity: diag.Error,
|
||
|
Summary: "Missing required cluster or environment settings",
|
||
|
Detail: detail,
|
||
|
Locations: rb.Config().GetLocations(taskPath.String()),
|
||
|
Paths: []dyn.Path{taskPath},
|
||
|
})
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return diags
|
||
|
}
|
||
|
|
||
|
// isComputeTask returns true if the task runs on a cluster or serverless GC
|
||
|
func isComputeTask(task jobs.Task) bool {
|
||
|
if task.NotebookTask != nil {
|
||
|
// if warehouse_id is set, it's SQL notebook that doesn't need cluster or serverless GC
|
||
|
if task.NotebookTask.WarehouseId != "" {
|
||
|
return false
|
||
|
} else {
|
||
|
// task settings don't require specifying a cluster/serverless GC, but task itself can run on one
|
||
|
// we handle that case separately in validateJobTask
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if task.PythonWheelTask != nil {
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
if task.DbtTask != nil {
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
if task.SparkJarTask != nil {
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
if task.SparkSubmitTask != nil {
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
if task.SparkPythonTask != nil {
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
if task.SqlTask != nil {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
if task.PipelineTask != nil {
|
||
|
// while pipelines use clusters, pipeline tasks don't, they only trigger pipelines
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
if task.RunJobTask != nil {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
if task.ConditionTask != nil {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
// for each task doesn't use clusters, underlying task(s) can though
|
||
|
if task.ForEachTask != nil {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|