acc: Add test for stale wheel problem (#2477)

This tests record current situation with wheel refresh on databricks
clusters.

See #2427 for background on "stale wheel" problem.
This commit is contained in:
Denis Bilenko 2025-03-14 13:24:18 +01:00 committed by GitHub
parent afa253c431
commit 164b6d404d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 154 additions and 1 deletions

18
acceptance/bin/update_file.py Executable file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env python3
"""
Usage: update_file.py FILENAME OLD NEW
Replace all strings OLD with NEW in FILENAME.
If OLD is not found in FILENAME, the script reports error.
"""
import sys
filename, old, new = sys.argv[1:]
data = open(filename).read()
newdata = data.replace(old, new)
if newdata == data:
sys.exit(f"{old=} not found in {filename=}\n{data}")
with open(filename, "w") as fobj:
fobj.write(newdata)

View File

@ -47,6 +47,24 @@ Hello from my func
Got arguments: Got arguments:
['my_test_code', 'one', 'two'] ['my_test_code', 'one', 'two']
=== Make a change to code without version change and run the job again
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!
>>> [CLI] bundle run some_other_job
Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID]
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" RUNNING
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" TERMINATED SUCCESS
UPDATED MY FUNC
Got arguments:
['my_test_code', 'one', 'two']
>>> [CLI] bundle destroy --auto-approve >>> [CLI] bundle destroy --auto-approve
The following resources will be deleted: The following resources will be deleted:
delete job some_other_job delete job some_other_job

View File

@ -4,3 +4,8 @@ trace cat databricks.yml
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy trace $CLI bundle deploy
trace $CLI bundle run some_other_job trace $CLI bundle run some_other_job
title "Make a change to code without version change and run the job again"
update_file.py my_test_code/__main__.py 'Hello from my func' 'UPDATED MY FUNC'
trace $CLI bundle deploy
trace $CLI bundle run some_other_job

View File

@ -11,7 +11,7 @@ resources:
spark_version: "$DEFAULT_SPARK_VERSION" spark_version: "$DEFAULT_SPARK_VERSION"
node_type_id: "$NODE_TYPE_ID" node_type_id: "$NODE_TYPE_ID"
num_workers: 1 num_workers: 1
data_security_mode: USER_ISOLATION data_security_mode: $DATA_SECURITY_MODE
jobs: jobs:
some_other_job: some_other_job:

View File

@ -47,6 +47,24 @@ Hello from my func
Got arguments: Got arguments:
['my_test_code', 'one', 'two'] ['my_test_code', 'one', 'two']
=== Make a change to code without version change and run the job again
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!
>>> [CLI] bundle run some_other_job
Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID]
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" RUNNING
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" TERMINATED SUCCESS
Hello from my func
Got arguments:
['my_test_code', 'one', 'two']
>>> [CLI] bundle destroy --auto-approve >>> [CLI] bundle destroy --auto-approve
The following resources will be deleted: The following resources will be deleted:
delete cluster test_cluster delete cluster test_cluster

View File

@ -1,5 +1,11 @@
export DATA_SECURITY_MODE=USER_ISOLATION
envsubst < databricks.yml.tmpl > databricks.yml envsubst < databricks.yml.tmpl > databricks.yml
trace cat databricks.yml trace cat databricks.yml
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy trace $CLI bundle deploy
trace $CLI bundle run some_other_job trace $CLI bundle run some_other_job
title "Make a change to code without version change and run the job again"
update_file.py my_test_code/__main__.py 'Hello from my func' 'UPDATED MY FUNC'
trace $CLI bundle deploy
trace $CLI bundle run some_other_job

View File

@ -0,0 +1,76 @@
>>> cat databricks.yml
bundle:
name: wheel-task
workspace:
root_path: "~/.bundle/[UNIQUE_NAME]"
resources:
clusters:
test_cluster:
cluster_name: "test-cluster-[UNIQUE_NAME]"
spark_version: "13.3.x-snapshot-scala2.12"
node_type_id: "[NODE_TYPE_ID]"
num_workers: 1
data_security_mode: SINGLE_USER
jobs:
some_other_job:
name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]"
tasks:
- task_key: TestTask
existing_cluster_id: "${resources.clusters.test_cluster.cluster_id}"
python_wheel_task:
package_name: my_test_code
entry_point: run
parameters:
- "one"
- "two"
libraries:
- whl: ./dist/*.whl
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!
>>> [CLI] bundle run some_other_job
Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID]
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" RUNNING
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" TERMINATED SUCCESS
Hello from my func
Got arguments:
['my_test_code', 'one', 'two']
=== Make a change to code without version change and run the job again
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!
>>> [CLI] bundle run some_other_job
Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID]
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" RUNNING
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" TERMINATED SUCCESS
UPDATED MY FUNC
Got arguments:
['my_test_code', 'one', 'two']
>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete cluster test_cluster
delete job some_other_job
All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]
Deleting files...
Destroy complete!

View File

@ -0,0 +1,12 @@
export DATA_SECURITY_MODE=SINGLE_USER
envsubst < $TESTDIR/../interactive_cluster/databricks.yml.tmpl > databricks.yml
trace cat databricks.yml
cp -r $TESTDIR/../interactive_cluster/{setup.py,my_test_code} .
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy
trace $CLI bundle run some_other_job
title "Make a change to code without version change and run the job again"
update_file.py my_test_code/__main__.py 'Hello from my func' 'UPDATED MY FUNC'
trace $CLI bundle deploy
trace $CLI bundle run some_other_job