From 1e1cfa90a90caab202834f62083a0df8fd479c3f Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 12 Mar 2025 16:49:24 +0100 Subject: [PATCH] acc: Fix interactive_cluster test to use correct config; add NODE_TYPE_ID (#2478) This is a follow up to #2471 where incorrect config was used in interactive_cluster test. ## Changes - Fixed interactive_cluster to use proper config, it was accidentally referring to config from ../base - Add $NODE_TYPE_ID env var and replacement to acceptance tests, this is necessary for interactive_cluster test. - Disable acceptance/bundle/override on cloud. This started failing because it has real node type that gets replaced with NODE_TYPE_ID but only in AWS env. Since the test is focussed on config merging, there is no need to run it against real workspaces. - Modify all tests in integration_whl to print rendered databricks.yml, to prevent this kind of error. --- acceptance/acceptance_test.go | 23 ++++++++++++ .../bundle/integration_whl/base/output.txt | 34 ++++++++++++++++- acceptance/bundle/integration_whl/base/script | 3 +- .../integration_whl/custom_params/output.txt | 34 ++++++++++++++++- .../integration_whl/custom_params/script | 3 +- .../interactive_cluster/output.txt | 36 +++++++++++++++++- .../interactive_cluster/script | 5 ++- .../bundle/integration_whl/wrapper/output.txt | 37 ++++++++++++++++++- .../bundle/integration_whl/wrapper/script | 3 +- .../wrapper_custom_params/output.txt | 37 ++++++++++++++++++- .../wrapper_custom_params/script | 3 +- acceptance/bundle/override/test.toml | 1 + 12 files changed, 208 insertions(+), 11 deletions(-) create mode 100644 acceptance/bundle/override/test.toml diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 489abb09b..549b38b5b 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -173,6 +173,10 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int { // Matches defaultSparkVersion in ../integration/bundle/helpers_test.go t.Setenv("DEFAULT_SPARK_VERSION", "13.3.x-snapshot-scala2.12") + nodeTypeID := getNodeTypeID(cloudEnv) + t.Setenv("NODE_TYPE_ID", nodeTypeID) + repls.Set(nodeTypeID, "[NODE_TYPE_ID]") + testDirs := getTests(t) require.NotEmpty(t, testDirs) @@ -799,3 +803,22 @@ func runWithLog(t *testing.T, cmd *exec.Cmd, out *os.File, tail bool) error { return <-processErrCh } + +func getNodeTypeID(cloudEnv string) string { + switch cloudEnv { + // no idea why, but + // aws-prod-ucws sets CLOUD_ENV to "ucws" + // gcp-prod-ucws sets CLOUD_ENV to "gcp-ucws" + // azure-prod-ucws sets CLOUD_ENV to "azure" + case "aws", "ucws": + return "i3.xlarge" + case "azure": + return "Standard_DS4_v2" + case "gcp", "gcp-ucws": + return "n1-standard-4" + case "": + return "local-fake-node" + default: + return "unknown-cloudEnv-" + cloudEnv + } +} diff --git a/acceptance/bundle/integration_whl/base/output.txt b/acceptance/bundle/integration_whl/base/output.txt index a6aadac83..d42daeaf3 100644 --- a/acceptance/bundle/integration_whl/base/output.txt +++ b/acceptance/bundle/integration_whl/base/output.txt @@ -1,13 +1,45 @@ + +>>> cat input.json { "project_name": "my_test_code", "spark_version": "13.3.x-snapshot-scala2.12", - "node_type_id": "", + "node_type_id": "[NODE_TYPE_ID]", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": false, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" } ✨ Successfully initialized template +>>> cat databricks.yml +bundle: + name: wheel-task + +workspace: + root_path: "~/.bundle/[UNIQUE_NAME]" + + + +resources: + jobs: + some_other_job: + name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]" + tasks: + - task_key: TestTask + new_cluster: + num_workers: 1 + spark_version: "13.3.x-snapshot-scala2.12" + node_type_id: "[NODE_TYPE_ID]" + data_security_mode: USER_ISOLATION + instance_pool_id: "[TEST_INSTANCE_POOL_ID]" + python_wheel_task: + package_name: my_test_code + entry_point: run + parameters: + - "one" + - "two" + libraries: + - whl: ./dist/*.whl + >>> [CLI] bundle deploy Building python_artifact... Uploading my_test_code-0.0.1-py3-none-any.whl... diff --git a/acceptance/bundle/integration_whl/base/script b/acceptance/bundle/integration_whl/base/script index 06c3bffdc..19418f5d4 100644 --- a/acceptance/bundle/integration_whl/base/script +++ b/acceptance/bundle/integration_whl/base/script @@ -1,8 +1,9 @@ export SPARK_VERSION=$DEFAULT_SPARK_VERSION export PYTHON_WHEEL_WRAPPER=false envsubst < input.json.tmpl > input.json -cat input.json +trace cat input.json $CLI bundle init . --config-file input.json +trace cat databricks.yml trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trace $CLI bundle deploy trace $CLI bundle run some_other_job diff --git a/acceptance/bundle/integration_whl/custom_params/output.txt b/acceptance/bundle/integration_whl/custom_params/output.txt index f4715eab7..19c1d87ce 100644 --- a/acceptance/bundle/integration_whl/custom_params/output.txt +++ b/acceptance/bundle/integration_whl/custom_params/output.txt @@ -1,13 +1,45 @@ + +>>> cat input.json { "project_name": "my_test_code", "spark_version": "13.3.x-snapshot-scala2.12", - "node_type_id": "", + "node_type_id": "[NODE_TYPE_ID]", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": false, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" } ✨ Successfully initialized template +>>> cat databricks.yml +bundle: + name: wheel-task + +workspace: + root_path: "~/.bundle/[UNIQUE_NAME]" + + + +resources: + jobs: + some_other_job: + name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]" + tasks: + - task_key: TestTask + new_cluster: + num_workers: 1 + spark_version: "13.3.x-snapshot-scala2.12" + node_type_id: "[NODE_TYPE_ID]" + data_security_mode: USER_ISOLATION + instance_pool_id: "[TEST_INSTANCE_POOL_ID]" + python_wheel_task: + package_name: my_test_code + entry_point: run + parameters: + - "one" + - "two" + libraries: + - whl: ./dist/*.whl + >>> [CLI] bundle deploy Building python_artifact... Uploading my_test_code-0.0.1-py3-none-any.whl... diff --git a/acceptance/bundle/integration_whl/custom_params/script b/acceptance/bundle/integration_whl/custom_params/script index 3abb7aafa..da7ba68f8 100644 --- a/acceptance/bundle/integration_whl/custom_params/script +++ b/acceptance/bundle/integration_whl/custom_params/script @@ -1,8 +1,9 @@ export SPARK_VERSION=$DEFAULT_SPARK_VERSION export PYTHON_WHEEL_WRAPPER=false envsubst < $TESTDIR/../base/input.json.tmpl > input.json -cat input.json +trace cat input.json $CLI bundle init $TESTDIR/../base --config-file input.json +trace cat databricks.yml trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trace $CLI bundle deploy trace $CLI bundle run some_other_job --python-params param1,param2 diff --git a/acceptance/bundle/integration_whl/interactive_cluster/output.txt b/acceptance/bundle/integration_whl/interactive_cluster/output.txt index a6aadac83..77b99ace2 100644 --- a/acceptance/bundle/integration_whl/interactive_cluster/output.txt +++ b/acceptance/bundle/integration_whl/interactive_cluster/output.txt @@ -1,13 +1,46 @@ + +>>> cat input.json { "project_name": "my_test_code", "spark_version": "13.3.x-snapshot-scala2.12", - "node_type_id": "", + "node_type_id": "[NODE_TYPE_ID]", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": false, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" } ✨ Successfully initialized template +>>> cat databricks.yml +bundle: + name: wheel-task + +workspace: + root_path: "~/.bundle/[UNIQUE_NAME]" + +resources: + clusters: + test_cluster: + cluster_name: "test-cluster-[UNIQUE_NAME]" + spark_version: "13.3.x-snapshot-scala2.12" + node_type_id: "[NODE_TYPE_ID]" + num_workers: 1 + data_security_mode: USER_ISOLATION + + jobs: + some_other_job: + name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]" + tasks: + - task_key: TestTask + existing_cluster_id: "${resources.clusters.test_cluster.cluster_id}" + python_wheel_task: + package_name: my_test_code + entry_point: run + parameters: + - "one" + - "two" + libraries: + - whl: ./dist/*.whl + >>> [CLI] bundle deploy Building python_artifact... Uploading my_test_code-0.0.1-py3-none-any.whl... @@ -27,6 +60,7 @@ Got arguments: >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: + delete cluster test_cluster delete job some_other_job All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME] diff --git a/acceptance/bundle/integration_whl/interactive_cluster/script b/acceptance/bundle/integration_whl/interactive_cluster/script index 7e10d6299..65b4ee2d2 100644 --- a/acceptance/bundle/integration_whl/interactive_cluster/script +++ b/acceptance/bundle/integration_whl/interactive_cluster/script @@ -1,8 +1,9 @@ export SPARK_VERSION=$DEFAULT_SPARK_VERSION export PYTHON_WHEEL_WRAPPER=false envsubst < $TESTDIR/../base/input.json.tmpl > input.json -cat input.json -$CLI bundle init $TESTDIR/../base --config-file input.json +trace cat input.json +$CLI bundle init . --config-file input.json +trace cat databricks.yml trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trace $CLI bundle deploy trace $CLI bundle run some_other_job diff --git a/acceptance/bundle/integration_whl/wrapper/output.txt b/acceptance/bundle/integration_whl/wrapper/output.txt index ca1819f3c..06e97bb27 100644 --- a/acceptance/bundle/integration_whl/wrapper/output.txt +++ b/acceptance/bundle/integration_whl/wrapper/output.txt @@ -1,13 +1,48 @@ + +>>> cat input.json { "project_name": "my_test_code", "spark_version": "12.2.x-scala2.12", - "node_type_id": "", + "node_type_id": "[NODE_TYPE_ID]", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": true, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" } ✨ Successfully initialized template +>>> cat databricks.yml +bundle: + name: wheel-task + +workspace: + root_path: "~/.bundle/[UNIQUE_NAME]" + + +experimental: + python_wheel_wrapper: true + + +resources: + jobs: + some_other_job: + name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]" + tasks: + - task_key: TestTask + new_cluster: + num_workers: 1 + spark_version: "12.2.x-scala2.12" + node_type_id: "[NODE_TYPE_ID]" + data_security_mode: USER_ISOLATION + instance_pool_id: "[TEST_INSTANCE_POOL_ID]" + python_wheel_task: + package_name: my_test_code + entry_point: run + parameters: + - "one" + - "two" + libraries: + - whl: ./dist/*.whl + >>> [CLI] bundle deploy Building python_artifact... Uploading my_test_code-0.0.1-py3-none-any.whl... diff --git a/acceptance/bundle/integration_whl/wrapper/script b/acceptance/bundle/integration_whl/wrapper/script index ee57bc783..3e6afcfaf 100644 --- a/acceptance/bundle/integration_whl/wrapper/script +++ b/acceptance/bundle/integration_whl/wrapper/script @@ -5,8 +5,9 @@ export SPARK_VERSION=12.2.x-scala2.12 export PYTHON_WHEEL_WRAPPER=true envsubst < $TESTDIR/../base/input.json.tmpl > input.json -cat input.json +trace cat input.json $CLI bundle init $TESTDIR/../base --config-file input.json +trace cat databricks.yml trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trace $CLI bundle deploy trace $CLI bundle run some_other_job diff --git a/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt b/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt index b5f97d5bc..c17ba0f8b 100644 --- a/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt +++ b/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt @@ -1,13 +1,48 @@ + +>>> cat input.json { "project_name": "my_test_code", "spark_version": "12.2.x-scala2.12", - "node_type_id": "", + "node_type_id": "[NODE_TYPE_ID]", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": true, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" } ✨ Successfully initialized template +>>> cat databricks.yml +bundle: + name: wheel-task + +workspace: + root_path: "~/.bundle/[UNIQUE_NAME]" + + +experimental: + python_wheel_wrapper: true + + +resources: + jobs: + some_other_job: + name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]" + tasks: + - task_key: TestTask + new_cluster: + num_workers: 1 + spark_version: "12.2.x-scala2.12" + node_type_id: "[NODE_TYPE_ID]" + data_security_mode: USER_ISOLATION + instance_pool_id: "[TEST_INSTANCE_POOL_ID]" + python_wheel_task: + package_name: my_test_code + entry_point: run + parameters: + - "one" + - "two" + libraries: + - whl: ./dist/*.whl + >>> [CLI] bundle deploy Building python_artifact... Uploading my_test_code-0.0.1-py3-none-any.whl... diff --git a/acceptance/bundle/integration_whl/wrapper_custom_params/script b/acceptance/bundle/integration_whl/wrapper_custom_params/script index c92f7162d..c92be51fa 100644 --- a/acceptance/bundle/integration_whl/wrapper_custom_params/script +++ b/acceptance/bundle/integration_whl/wrapper_custom_params/script @@ -1,8 +1,9 @@ export SPARK_VERSION=12.2.x-scala2.12 export PYTHON_WHEEL_WRAPPER=true envsubst < $TESTDIR/../base/input.json.tmpl > input.json -cat input.json +trace cat input.json $CLI bundle init $TESTDIR/../base --config-file input.json +trace cat databricks.yml trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trace $CLI bundle deploy trace $CLI bundle run some_other_job --python-params param1,param2 diff --git a/acceptance/bundle/override/test.toml b/acceptance/bundle/override/test.toml new file mode 100644 index 000000000..18b1a8841 --- /dev/null +++ b/acceptance/bundle/override/test.toml @@ -0,0 +1 @@ +Cloud = false