Convert python_wheel_test.go to acceptance test (#2471)

Convert integration/bundle/integration/bundle/python_wheel_test.go to
acceptance tests. I plan to expand these tests to check patchwheel
functionality.

Inside each test there were two runs - with params and without, I've
expanded each run into separate test to reduce total time as this runs
can be done in parallel.

Also add new env var DEFAULT_SPARK_VERSION that matches the one in
integration tests.

The tests are currently enabled on every PR (`CloudLong=true` is
commented out), this can be changed after landing.
This commit is contained in:
Denis Bilenko 2025-03-12 11:51:31 +01:00 committed by GitHub
parent 55387a6014
commit 8b51eeb57a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 250 additions and 64 deletions

View File

@ -170,6 +170,9 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int {
repls.Repls = append(repls.Repls, testdiff.Replacement{Old: regexp.MustCompile("dbapi[0-9a-f]+"), New: "[DATABRICKS_TOKEN]"}) repls.Repls = append(repls.Repls, testdiff.Replacement{Old: regexp.MustCompile("dbapi[0-9a-f]+"), New: "[DATABRICKS_TOKEN]"})
// Matches defaultSparkVersion in ../integration/bundle/helpers_test.go
t.Setenv("DEFAULT_SPARK_VERSION", "13.3.x-snapshot-scala2.12")
testDirs := getTests(t) testDirs := getTests(t)
require.NotEmpty(t, testDirs) require.NotEmpty(t, testDirs)

View File

@ -0,0 +1,8 @@
{
"project_name": "my_test_code",
"spark_version": "$SPARK_VERSION",
"node_type_id": "$NODE_TYPE_ID",
"unique_id": "$UNIQUE_NAME",
"python_wheel_wrapper": $PYTHON_WHEEL_WRAPPER,
"instance_pool_id": "$TEST_INSTANCE_POOL_ID"
}

View File

@ -0,0 +1,35 @@
{
"project_name": "my_test_code",
"spark_version": "13.3.x-snapshot-scala2.12",
"node_type_id": "",
"unique_id": "[UNIQUE_NAME]",
"python_wheel_wrapper": false,
"instance_pool_id": "[TEST_INSTANCE_POOL_ID]"
}
✨ Successfully initialized template
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!
>>> [CLI] bundle run some_other_job
Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID]
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" RUNNING
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" TERMINATED SUCCESS
Hello from my func
Got arguments:
['my_test_code', 'one', 'two']
>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete job some_other_job
All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]
Deleting files...
Destroy complete!

View File

@ -0,0 +1,8 @@
export SPARK_VERSION=$DEFAULT_SPARK_VERSION
export PYTHON_WHEEL_WRAPPER=false
envsubst < input.json.tmpl > input.json
cat input.json
$CLI bundle init . --config-file input.json
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy
trace $CLI bundle run some_other_job

View File

@ -0,0 +1,35 @@
{
"project_name": "my_test_code",
"spark_version": "13.3.x-snapshot-scala2.12",
"node_type_id": "",
"unique_id": "[UNIQUE_NAME]",
"python_wheel_wrapper": false,
"instance_pool_id": "[TEST_INSTANCE_POOL_ID]"
}
✨ Successfully initialized template
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!
>>> [CLI] bundle run some_other_job --python-params param1,param2
Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID]
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" RUNNING
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" TERMINATED SUCCESS
Hello from my func
Got arguments:
['my_test_code', 'param1', 'param2']
>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete job some_other_job
All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]
Deleting files...
Destroy complete!

View File

@ -0,0 +1,8 @@
export SPARK_VERSION=$DEFAULT_SPARK_VERSION
export PYTHON_WHEEL_WRAPPER=false
envsubst < $TESTDIR/../base/input.json.tmpl > input.json
cat input.json
$CLI bundle init $TESTDIR/../base --config-file input.json
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy
trace $CLI bundle run some_other_job --python-params param1,param2

View File

@ -0,0 +1,35 @@
{
"project_name": "my_test_code",
"spark_version": "13.3.x-snapshot-scala2.12",
"node_type_id": "",
"unique_id": "[UNIQUE_NAME]",
"python_wheel_wrapper": false,
"instance_pool_id": "[TEST_INSTANCE_POOL_ID]"
}
✨ Successfully initialized template
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!
>>> [CLI] bundle run some_other_job
Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID]
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" RUNNING
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" TERMINATED SUCCESS
Hello from my func
Got arguments:
['my_test_code', 'one', 'two']
>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete job some_other_job
All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]
Deleting files...
Destroy complete!

View File

@ -0,0 +1,8 @@
export SPARK_VERSION=$DEFAULT_SPARK_VERSION
export PYTHON_WHEEL_WRAPPER=false
envsubst < $TESTDIR/../base/input.json.tmpl > input.json
cat input.json
$CLI bundle init $TESTDIR/../base --config-file input.json
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy
trace $CLI bundle run some_other_job

View File

@ -0,0 +1,20 @@
Local = false
#CloudSlow = true
Ignore = [
".databricks",
"build",
"dist",
"my_test_code",
"my_test_code.egg-info",
"setup.py",
"input.json",
"databricks.yml",
]
[[Repls]]
Old = '2\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d'
New = "[TIMESTAMP]"
[[Repls]]
Old = '\d{5,}'
New = "[NUMID]"

View File

@ -0,0 +1,35 @@
{
"project_name": "my_test_code",
"spark_version": "12.2.x-scala2.12",
"node_type_id": "",
"unique_id": "[UNIQUE_NAME]",
"python_wheel_wrapper": true,
"instance_pool_id": "[TEST_INSTANCE_POOL_ID]"
}
✨ Successfully initialized template
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!
>>> [CLI] bundle run some_other_job
Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID]
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" RUNNING
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" TERMINATED SUCCESS
Hello from my func
Got arguments:
['my_test_code', 'one', 'two']
>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete job some_other_job
All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]
Deleting files...
Destroy complete!

View File

@ -0,0 +1,12 @@
# Installing wheels from Workspace file system is only supported starting from DBR 13.1+.
# But before users used older DBRs and python wheel tasks but installed it from DBFS.
# We still want to support older DBRs and did the trampoline workaround (https://github.com/databricks/cli/pull/635)
# Hence this is to test that python wheel tasks in DABs are working for older DBRs
export SPARK_VERSION=12.2.x-scala2.12
export PYTHON_WHEEL_WRAPPER=true
envsubst < $TESTDIR/../base/input.json.tmpl > input.json
cat input.json
$CLI bundle init $TESTDIR/../base --config-file input.json
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy
trace $CLI bundle run some_other_job

View File

@ -0,0 +1,35 @@
{
"project_name": "my_test_code",
"spark_version": "12.2.x-scala2.12",
"node_type_id": "",
"unique_id": "[UNIQUE_NAME]",
"python_wheel_wrapper": true,
"instance_pool_id": "[TEST_INSTANCE_POOL_ID]"
}
✨ Successfully initialized template
>>> [CLI] bundle deploy
Building python_artifact...
Uploading my_test_code-0.0.1-py3-none-any.whl...
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!
>>> [CLI] bundle run some_other_job --python-params param1,param2
Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID]
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" RUNNING
[TIMESTAMP] "[default] Test Wheel Job [UNIQUE_NAME]" TERMINATED SUCCESS
Hello from my func
Got arguments:
['my_test_code', 'param1', 'param2']
>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete job some_other_job
All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]
Deleting files...
Destroy complete!

View File

@ -0,0 +1,8 @@
export SPARK_VERSION=12.2.x-scala2.12
export PYTHON_WHEEL_WRAPPER=true
envsubst < $TESTDIR/../base/input.json.tmpl > input.json
cat input.json
$CLI bundle init $TESTDIR/../base --config-file input.json
trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT
trace $CLI bundle deploy
trace $CLI bundle run some_other_job --python-params param1,param2

View File

@ -1,64 +0,0 @@
package bundle_test
import (
"testing"
"github.com/databricks/cli/integration/internal/acc"
"github.com/databricks/cli/internal/testutil"
"github.com/databricks/cli/libs/env"
"github.com/google/uuid"
"github.com/stretchr/testify/require"
)
func runPythonWheelTest(t *testing.T, templateName, sparkVersion string, pythonWheelWrapper bool) {
ctx, _ := acc.WorkspaceTest(t)
nodeTypeId := testutil.GetCloud(t).NodeTypeID()
instancePoolId := env.Get(ctx, "TEST_INSTANCE_POOL_ID")
bundleRoot := initTestTemplate(t, ctx, templateName, map[string]any{
"node_type_id": nodeTypeId,
"unique_id": uuid.New().String(),
"spark_version": sparkVersion,
"python_wheel_wrapper": pythonWheelWrapper,
"instance_pool_id": instancePoolId,
})
deployBundle(t, ctx, bundleRoot)
t.Cleanup(func() {
destroyBundle(t, ctx, bundleRoot)
})
if testing.Short() {
t.Log("Skip the job run in short mode")
return
}
out, err := runResource(t, ctx, bundleRoot, "some_other_job")
require.NoError(t, err)
require.Contains(t, out, "Hello from my func")
require.Contains(t, out, "Got arguments:")
require.Contains(t, out, "['my_test_code', 'one', 'two']")
out, err = runResourceWithParams(t, ctx, bundleRoot, "some_other_job", "--python-params=param1,param2")
require.NoError(t, err)
require.Contains(t, out, "Hello from my func")
require.Contains(t, out, "Got arguments:")
require.Contains(t, out, "['my_test_code', 'param1', 'param2']")
}
func TestPythonWheelTaskDeployAndRunWithoutWrapper(t *testing.T) {
runPythonWheelTest(t, "python_wheel_task", "13.3.x-snapshot-scala2.12", false)
}
func TestPythonWheelTaskDeployAndRunWithWrapper(t *testing.T) {
runPythonWheelTest(t, "python_wheel_task", "12.2.x-scala2.12", true)
}
func TestPythonWheelTaskDeployAndRunOnInteractiveCluster(t *testing.T) {
if testutil.GetCloud(t) == testutil.AWS {
t.Skip("Skipping test for AWS cloud because it is not permitted to create clusters")
}
runPythonWheelTest(t, "python_wheel_task_with_cluster", defaultSparkVersion, false)
}