acc: Fix interactive_cluster test to use correct config; add NODE_TYPE_ID (#2478)

This is a follow up to #2471 where incorrect config was used in
interactive_cluster test.

## Changes
- Fixed interactive_cluster to use proper config, it was accidentally
referring to config from ../base
- Add $NODE_TYPE_ID env var and replacement to acceptance tests, this is
necessary for interactive_cluster test.
- Disable acceptance/bundle/override on cloud. This started failing
because it has real node type that gets replaced with NODE_TYPE_ID but
only in AWS env. Since the test is focussed on config merging, there is
no need to run it against real workspaces.
- Modify all tests in integration_whl to print rendered databricks.yml,
to prevent this kind of error.
This commit is contained in:
Denis Bilenko 2025-03-12 16:49:24 +01:00 committed by GitHub
parent 06b71fe5da
commit 1e1cfa90a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 208 additions and 11 deletions

View File

@ -173,6 +173,10 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int {
// Matches defaultSparkVersion in ../integration/bundle/helpers_test.go
t.Setenv("DEFAULT_SPARK_VERSION", "13.3.x-snapshot-scala2.12")
nodeTypeID := getNodeTypeID(cloudEnv)
t.Setenv("NODE_TYPE_ID", nodeTypeID)
repls.Set(nodeTypeID, "[NODE_TYPE_ID]")
testDirs := getTests(t)
require.NotEmpty(t, testDirs)
@ -799,3 +803,22 @@ func runWithLog(t *testing.T, cmd *exec.Cmd, out *os.File, tail bool) error {
return <-processErrCh
}
func getNodeTypeID(cloudEnv string) string {
switch cloudEnv {
// no idea why, but
// aws-prod-ucws sets CLOUD_ENV to "ucws"
// gcp-prod-ucws sets CLOUD_ENV to "gcp-ucws"
// azure-prod-ucws sets CLOUD_ENV to "azure"
case "aws", "ucws":
return "i3.xlarge"
case "azure":
return "Standard_DS4_v2"
case "gcp", "gcp-ucws":
return "n1-standard-4"
case "":
return "local-fake-node"
default:
return "unknown-cloudEnv-" + cloudEnv
}
}

View File

@ -1,13 +1,45 @@
>>> cat input.json
{
"project_name": "my_test_code",
"spark_version": "13.3.x-snapshot-scala2.12",
"node_type_id": "",
"node_type_id": "[NODE_TYPE_ID]",
"unique_id": "[UNIQUE_NAME]",
"python_wheel_wrapper": false,
"instance_pool_id": "[TEST_INSTANCE_POOL_ID]"
}
✨ Successfully initialized template
>>> cat databricks.yml
bundle:
name: wheel-task
workspace:
root_path: "~/.bundle/[UNIQUE_NAME]"
resources:
jobs:
some_other_job:
name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]"
tasks:
- task_key: TestTask
new_cluster:
num_workers: 1
spark_version: "13.3.x-snapshot-scala2.12"
node_type_id: "[NODE_TYPE_ID]"
data_security_mode: USER_ISOLATION
instance_pool_id: "[TEST_INSTANCE_POOL_ID]"
python_wheel_task:
package_name: my_test_code
entry_point: run
parameters:
- "one"
- "two"
libraries:
- whl: ./dist/*.whl
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...

View File

@ -1,8 +1,9 @@
export SPARK_VERSION=$DEFAULT_SPARK_VERSION
export PYTHON_WHEEL_WRAPPER=false
envsubst < input.json.tmpl > input.json
cat input.json
trace cat input.json
$CLI bundle init . --config-file input.json
trace cat databricks.yml
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy
trace $CLI bundle run some_other_job

View File

@ -1,13 +1,45 @@
>>> cat input.json
{
"project_name": "my_test_code",
"spark_version": "13.3.x-snapshot-scala2.12",
"node_type_id": "",
"node_type_id": "[NODE_TYPE_ID]",
"unique_id": "[UNIQUE_NAME]",
"python_wheel_wrapper": false,
"instance_pool_id": "[TEST_INSTANCE_POOL_ID]"
}
✨ Successfully initialized template
>>> cat databricks.yml
bundle:
name: wheel-task
workspace:
root_path: "~/.bundle/[UNIQUE_NAME]"
resources:
jobs:
some_other_job:
name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]"
tasks:
- task_key: TestTask
new_cluster:
num_workers: 1
spark_version: "13.3.x-snapshot-scala2.12"
node_type_id: "[NODE_TYPE_ID]"
data_security_mode: USER_ISOLATION
instance_pool_id: "[TEST_INSTANCE_POOL_ID]"
python_wheel_task:
package_name: my_test_code
entry_point: run
parameters:
- "one"
- "two"
libraries:
- whl: ./dist/*.whl
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...

View File

@ -1,8 +1,9 @@
export SPARK_VERSION=$DEFAULT_SPARK_VERSION
export PYTHON_WHEEL_WRAPPER=false
envsubst < $TESTDIR/../base/input.json.tmpl > input.json
cat input.json
trace cat input.json
$CLI bundle init $TESTDIR/../base --config-file input.json
trace cat databricks.yml
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy
trace $CLI bundle run some_other_job --python-params param1,param2

View File

@ -1,13 +1,46 @@
>>> cat input.json
{
"project_name": "my_test_code",
"spark_version": "13.3.x-snapshot-scala2.12",
"node_type_id": "",
"node_type_id": "[NODE_TYPE_ID]",
"unique_id": "[UNIQUE_NAME]",
"python_wheel_wrapper": false,
"instance_pool_id": "[TEST_INSTANCE_POOL_ID]"
}
✨ Successfully initialized template
>>> cat databricks.yml
bundle:
name: wheel-task
workspace:
root_path: "~/.bundle/[UNIQUE_NAME]"
resources:
clusters:
test_cluster:
cluster_name: "test-cluster-[UNIQUE_NAME]"
spark_version: "13.3.x-snapshot-scala2.12"
node_type_id: "[NODE_TYPE_ID]"
num_workers: 1
data_security_mode: USER_ISOLATION
jobs:
some_other_job:
name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]"
tasks:
- task_key: TestTask
existing_cluster_id: "${resources.clusters.test_cluster.cluster_id}"
python_wheel_task:
package_name: my_test_code
entry_point: run
parameters:
- "one"
- "two"
libraries:
- whl: ./dist/*.whl
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...
@ -27,6 +60,7 @@ Got arguments:
>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete cluster test_cluster
delete job some_other_job
All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]

View File

@ -1,8 +1,9 @@
export SPARK_VERSION=$DEFAULT_SPARK_VERSION
export PYTHON_WHEEL_WRAPPER=false
envsubst < $TESTDIR/../base/input.json.tmpl > input.json
cat input.json
$CLI bundle init $TESTDIR/../base --config-file input.json
trace cat input.json
$CLI bundle init . --config-file input.json
trace cat databricks.yml
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy
trace $CLI bundle run some_other_job

View File

@ -1,13 +1,48 @@
>>> cat input.json
{
"project_name": "my_test_code",
"spark_version": "12.2.x-scala2.12",
"node_type_id": "",
"node_type_id": "[NODE_TYPE_ID]",
"unique_id": "[UNIQUE_NAME]",
"python_wheel_wrapper": true,
"instance_pool_id": "[TEST_INSTANCE_POOL_ID]"
}
✨ Successfully initialized template
>>> cat databricks.yml
bundle:
name: wheel-task
workspace:
root_path: "~/.bundle/[UNIQUE_NAME]"
experimental:
python_wheel_wrapper: true
resources:
jobs:
some_other_job:
name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]"
tasks:
- task_key: TestTask
new_cluster:
num_workers: 1
spark_version: "12.2.x-scala2.12"
node_type_id: "[NODE_TYPE_ID]"
data_security_mode: USER_ISOLATION
instance_pool_id: "[TEST_INSTANCE_POOL_ID]"
python_wheel_task:
package_name: my_test_code
entry_point: run
parameters:
- "one"
- "two"
libraries:
- whl: ./dist/*.whl
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...

View File

@ -5,8 +5,9 @@
export SPARK_VERSION=12.2.x-scala2.12
export PYTHON_WHEEL_WRAPPER=true
envsubst < $TESTDIR/../base/input.json.tmpl > input.json
cat input.json
trace cat input.json
$CLI bundle init $TESTDIR/../base --config-file input.json
trace cat databricks.yml
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy
trace $CLI bundle run some_other_job

View File

@ -1,13 +1,48 @@
>>> cat input.json
{
"project_name": "my_test_code",
"spark_version": "12.2.x-scala2.12",
"node_type_id": "",
"node_type_id": "[NODE_TYPE_ID]",
"unique_id": "[UNIQUE_NAME]",
"python_wheel_wrapper": true,
"instance_pool_id": "[TEST_INSTANCE_POOL_ID]"
}
✨ Successfully initialized template
>>> cat databricks.yml
bundle:
name: wheel-task
workspace:
root_path: "~/.bundle/[UNIQUE_NAME]"
experimental:
python_wheel_wrapper: true
resources:
jobs:
some_other_job:
name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]"
tasks:
- task_key: TestTask
new_cluster:
num_workers: 1
spark_version: "12.2.x-scala2.12"
node_type_id: "[NODE_TYPE_ID]"
data_security_mode: USER_ISOLATION
instance_pool_id: "[TEST_INSTANCE_POOL_ID]"
python_wheel_task:
package_name: my_test_code
entry_point: run
parameters:
- "one"
- "two"
libraries:
- whl: ./dist/*.whl
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...

View File

@ -1,8 +1,9 @@
export SPARK_VERSION=12.2.x-scala2.12
export PYTHON_WHEEL_WRAPPER=true
envsubst < $TESTDIR/../base/input.json.tmpl > input.json
cat input.json
trace cat input.json
$CLI bundle init $TESTDIR/../base --config-file input.json
trace cat databricks.yml
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy
trace $CLI bundle run some_other_job --python-params param1,param2

View File

@ -0,0 +1 @@
Cloud = false