mirror of https://github.com/databricks/cli.git
Fixed spark version check for clusters defined in the same bundle (#2374)
## Changes Previously using python wheel tasks in the tasks with compute referering to interactive cluster defied in the same bundle would produce a warning like below ``` GET /api/2.1/clusters/get?cluster_id=${resources.clusters.development_cluster.id} < HTTP/2.0 400 Bad Request < { < "error_code": "INVALID_PARAMETER_VALUE", < "message": "Cluster ${resources.clusters.development_cluster.id} does not exist" < } pid=14465 mutator=seq mutator=initialize mutator=seq mutator=PythonWrapperWarning sdk=true ``` This PR fixes it by making sure that we check spark version for such clusters based on its bundle configuration and don't make API calls ## Tests Added acceptance test
This commit is contained in:
parent
9bacf221d7
commit
cdea775bd2
|
@ -0,0 +1,37 @@
|
||||||
|
bundle:
|
||||||
|
name: trampoline_warning_message
|
||||||
|
|
||||||
|
targets:
|
||||||
|
dev:
|
||||||
|
mode: development
|
||||||
|
default: true
|
||||||
|
|
||||||
|
prod:
|
||||||
|
resources:
|
||||||
|
clusters:
|
||||||
|
interactive_cluster:
|
||||||
|
spark_version: 14.2.x-cpu-ml-scala2.12
|
||||||
|
|
||||||
|
|
||||||
|
resources:
|
||||||
|
clusters:
|
||||||
|
interactive_cluster:
|
||||||
|
cluster_name: jobs-as-code-all-purpose-cluster
|
||||||
|
spark_version: 12.2.x-cpu-ml-scala2.12
|
||||||
|
node_type_id: r5d.8xlarge
|
||||||
|
autotermination_minutes: 30
|
||||||
|
autoscale:
|
||||||
|
min_workers: 1
|
||||||
|
max_workers: 1
|
||||||
|
driver_node_type_id: r5d.8xlarge
|
||||||
|
jobs:
|
||||||
|
whl:
|
||||||
|
name: "wheel-job"
|
||||||
|
tasks:
|
||||||
|
- task_key: test_task
|
||||||
|
python_wheel_task:
|
||||||
|
package_name: my_package
|
||||||
|
entry_point: my_module.my_function
|
||||||
|
existing_cluster_id: ${resources.clusters.interactive_cluster.id}
|
||||||
|
libraries:
|
||||||
|
- whl: ./dist/*.whl
|
|
@ -0,0 +1,22 @@
|
||||||
|
|
||||||
|
>>> errcode [CLI] bundle validate -t dev
|
||||||
|
Error: Python wheel tasks require compute with DBR 13.3+ to include local libraries. Please change your cluster configuration or use the experimental 'python_wheel_wrapper' setting. See https://docs.databricks.com/dev-tools/bundles/python-wheel.html for more information.
|
||||||
|
|
||||||
|
Name: trampoline_warning_message
|
||||||
|
Target: dev
|
||||||
|
Workspace:
|
||||||
|
User: [USERNAME]
|
||||||
|
Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message/dev
|
||||||
|
|
||||||
|
Found 1 error
|
||||||
|
|
||||||
|
Exit code: 1
|
||||||
|
|
||||||
|
>>> errcode [CLI] bundle validate -t prod
|
||||||
|
Name: trampoline_warning_message
|
||||||
|
Target: prod
|
||||||
|
Workspace:
|
||||||
|
User: [USERNAME]
|
||||||
|
Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message/prod
|
||||||
|
|
||||||
|
Validation OK!
|
|
@ -0,0 +1,2 @@
|
||||||
|
trace errcode $CLI bundle validate -t dev
|
||||||
|
trace errcode $CLI bundle validate -t prod
|
|
@ -0,0 +1,20 @@
|
||||||
|
bundle:
|
||||||
|
name: trampoline_warning_message_with_new_spark
|
||||||
|
|
||||||
|
targets:
|
||||||
|
dev:
|
||||||
|
mode: development
|
||||||
|
default: true
|
||||||
|
|
||||||
|
resources:
|
||||||
|
jobs:
|
||||||
|
whl:
|
||||||
|
name: "wheel-job"
|
||||||
|
tasks:
|
||||||
|
- task_key: test_task
|
||||||
|
python_wheel_task:
|
||||||
|
package_name: my_package
|
||||||
|
entry_point: my_module.my_function
|
||||||
|
existing_cluster_id: "some-test-cluster-id"
|
||||||
|
libraries:
|
||||||
|
- whl: ./dist/*.whl
|
|
@ -0,0 +1,9 @@
|
||||||
|
|
||||||
|
>>> errcode [CLI] bundle validate
|
||||||
|
Name: trampoline_warning_message_with_new_spark
|
||||||
|
Target: dev
|
||||||
|
Workspace:
|
||||||
|
User: [USERNAME]
|
||||||
|
Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message_with_new_spark/dev
|
||||||
|
|
||||||
|
Validation OK!
|
|
@ -0,0 +1 @@
|
||||||
|
trace errcode $CLI bundle validate
|
|
@ -0,0 +1,16 @@
|
||||||
|
# Since we use existing cluster id value which is not available in cloud envs, we need to stub the request
|
||||||
|
# and run this test only locally
|
||||||
|
LocalOnly = true
|
||||||
|
|
||||||
|
[[Server]]
|
||||||
|
Pattern = "GET /api/2.1/clusters/get"
|
||||||
|
Response.Body = '''
|
||||||
|
{
|
||||||
|
"cluster_id": "some-cluster-id",
|
||||||
|
"state": "RUNNING",
|
||||||
|
"spark_version": "13.3.x-scala2.12",
|
||||||
|
"node_type_id": "Standard_DS3_v2",
|
||||||
|
"driver_node_type_id": "Standard_DS3_v2",
|
||||||
|
"cluster_name": "some-cluster-name",
|
||||||
|
"spark_context_id": 12345
|
||||||
|
}'''
|
|
@ -0,0 +1,20 @@
|
||||||
|
bundle:
|
||||||
|
name: trampoline_warning_message_with_old_spark
|
||||||
|
|
||||||
|
targets:
|
||||||
|
dev:
|
||||||
|
mode: development
|
||||||
|
default: true
|
||||||
|
|
||||||
|
resources:
|
||||||
|
jobs:
|
||||||
|
whl:
|
||||||
|
name: "wheel-job"
|
||||||
|
tasks:
|
||||||
|
- task_key: test_task
|
||||||
|
python_wheel_task:
|
||||||
|
package_name: my_package
|
||||||
|
entry_point: my_module.my_function
|
||||||
|
existing_cluster_id: "some-test-cluster-id"
|
||||||
|
libraries:
|
||||||
|
- whl: ./dist/*.whl
|
|
@ -0,0 +1,13 @@
|
||||||
|
|
||||||
|
>>> errcode [CLI] bundle validate
|
||||||
|
Error: Python wheel tasks require compute with DBR 13.3+ to include local libraries. Please change your cluster configuration or use the experimental 'python_wheel_wrapper' setting. See https://docs.databricks.com/dev-tools/bundles/python-wheel.html for more information.
|
||||||
|
|
||||||
|
Name: trampoline_warning_message_with_old_spark
|
||||||
|
Target: dev
|
||||||
|
Workspace:
|
||||||
|
User: [USERNAME]
|
||||||
|
Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message_with_old_spark/dev
|
||||||
|
|
||||||
|
Found 1 error
|
||||||
|
|
||||||
|
Exit code: 1
|
|
@ -0,0 +1 @@
|
||||||
|
trace errcode $CLI bundle validate
|
|
@ -0,0 +1,16 @@
|
||||||
|
# Since we use existing cluster id value which is not available in cloud envs, we need to stub the request
|
||||||
|
# and run this test only locally
|
||||||
|
LocalOnly = true
|
||||||
|
|
||||||
|
[[Server]]
|
||||||
|
Pattern = "GET /api/2.1/clusters/get"
|
||||||
|
Response.Body = '''
|
||||||
|
{
|
||||||
|
"cluster_id": "some-cluster-id",
|
||||||
|
"state": "RUNNING",
|
||||||
|
"spark_version": "7.3.x-scala2.12",
|
||||||
|
"node_type_id": "Standard_DS3_v2",
|
||||||
|
"driver_node_type_id": "Standard_DS3_v2",
|
||||||
|
"cluster_name": "some-cluster-name",
|
||||||
|
"spark_context_id": 12345
|
||||||
|
}'''
|
|
@ -9,6 +9,7 @@ import (
|
||||||
"github.com/databricks/cli/bundle/config"
|
"github.com/databricks/cli/bundle/config"
|
||||||
"github.com/databricks/cli/bundle/libraries"
|
"github.com/databricks/cli/bundle/libraries"
|
||||||
"github.com/databricks/cli/libs/diag"
|
"github.com/databricks/cli/libs/diag"
|
||||||
|
"github.com/databricks/cli/libs/dyn/dynvar"
|
||||||
"github.com/databricks/cli/libs/log"
|
"github.com/databricks/cli/libs/log"
|
||||||
"github.com/databricks/databricks-sdk-go"
|
"github.com/databricks/databricks-sdk-go"
|
||||||
"golang.org/x/mod/semver"
|
"golang.org/x/mod/semver"
|
||||||
|
@ -60,11 +61,37 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
if task.ExistingClusterId != "" {
|
if task.ExistingClusterId != "" {
|
||||||
version, err := getSparkVersionForCluster(ctx, b.WorkspaceClient(), task.ExistingClusterId)
|
var version string
|
||||||
// If there's error getting spark version for cluster, do not mark it as incompatible
|
var err error
|
||||||
if err != nil {
|
// If the cluster id is a variable and it's not resolved, it means it references a cluster defined in the same bundle.
|
||||||
log.Warnf(ctx, "unable to get spark version for cluster %s, err: %s", task.ExistingClusterId, err.Error())
|
// So we can get the version from the cluster definition.
|
||||||
return false
|
// It's defined in a form of resources.clusters.<cluster_key>.id
|
||||||
|
if strings.HasPrefix(task.ExistingClusterId, "${") {
|
||||||
|
p, ok := dynvar.PureReferenceToPath(task.ExistingClusterId)
|
||||||
|
if !ok || len(p) < 3 {
|
||||||
|
log.Warnf(ctx, "unable to parse cluster key from %s", task.ExistingClusterId)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if p[0].Key() != "resources" || p[1].Key() != "clusters" {
|
||||||
|
log.Warnf(ctx, "incorrect variable reference for cluster id %s", task.ExistingClusterId)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
clusterKey := p[2].Key()
|
||||||
|
cluster, ok := b.Config.Resources.Clusters[clusterKey]
|
||||||
|
if !ok {
|
||||||
|
log.Warnf(ctx, "unable to find cluster with key %s", clusterKey)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
version = cluster.SparkVersion
|
||||||
|
} else {
|
||||||
|
version, err = getSparkVersionForCluster(ctx, b.WorkspaceClient(), task.ExistingClusterId)
|
||||||
|
// If there's error getting spark version for cluster, do not mark it as incompatible
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf(ctx, "unable to get spark version for cluster %s, err: %s", task.ExistingClusterId, err.Error())
|
||||||
|
return false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if lowerThanExpectedVersion(version) {
|
if lowerThanExpectedVersion(version) {
|
||||||
|
@ -82,7 +109,7 @@ func lowerThanExpectedVersion(sparkVersion string) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if parts[1][0] == 'x' { // treat versions like 13.x as the very latest minor (13.99)
|
if len(parts[1]) > 0 && parts[1][0] == 'x' { // treat versions like 13.x as the very latest minor (13.99)
|
||||||
parts[1] = "99"
|
parts[1] = "99"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -346,6 +346,7 @@ func TestSparkVersionLowerThanExpected(t *testing.T) {
|
||||||
"13.x-rc-scala-2.12": false,
|
"13.x-rc-scala-2.12": false,
|
||||||
"client.1.10-scala2.12": false,
|
"client.1.10-scala2.12": false,
|
||||||
"latest-stable-gpu-scala2.11": false,
|
"latest-stable-gpu-scala2.11": false,
|
||||||
|
"1.": false,
|
||||||
"10.4.x-aarch64-photon-scala2.12": true,
|
"10.4.x-aarch64-photon-scala2.12": true,
|
||||||
"10.4.x-scala2.12": true,
|
"10.4.x-scala2.12": true,
|
||||||
"13.0.x-scala2.12": true,
|
"13.0.x-scala2.12": true,
|
||||||
|
|
Loading…
Reference in New Issue