Run Spark JAR task test on multiple DBR versions (#1665)

## Changes

This explores error messages on older DBRs and UC vs non-UC.

## Tests

Integration tests pass.
This commit is contained in:
Pieter Noordhuis 2024-08-09 17:13:31 +02:00 committed by GitHub
parent 65f4aad87c
commit a240be0b5a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 120 additions and 20 deletions

View File

@ -3,7 +3,6 @@ bundle:
workspace: workspace:
root_path: "~/.bundle/{{.unique_id}}" root_path: "~/.bundle/{{.unique_id}}"
artifact_path: {{.artifact_path}}
artifacts: artifacts:
my_java_code: my_java_code:
@ -27,3 +26,30 @@ resources:
main_class_name: PrintArgs main_class_name: PrintArgs
libraries: libraries:
- jar: ./{{.project_name}}/PrintArgs.jar - jar: ./{{.project_name}}/PrintArgs.jar
targets:
volume:
# Override the artifact path to upload artifacts to a volume path
workspace:
artifact_path: {{.artifact_path}}
resources:
jobs:
jar_job:
tasks:
- task_key: TestSparkJarTask
new_cluster:
# Force cluster to run in single user mode (force it to be a UC cluster)
data_security_mode: SINGLE_USER
workspace:
resources:
jobs:
jar_job:
tasks:
- task_key: TestSparkJarTask
new_cluster:
# Force cluster to run in no isolation mode (force it to be a non-UC cluster)
data_security_mode: NONE

View File

@ -12,6 +12,7 @@ import (
"github.com/databricks/cli/cmd/root" "github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/internal" "github.com/databricks/cli/internal"
"github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/env"
"github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/flags"
"github.com/databricks/cli/libs/template" "github.com/databricks/cli/libs/template"
"github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go"
@ -56,21 +57,21 @@ func writeConfigFile(t *testing.T, config map[string]any) (string, error) {
} }
func validateBundle(t *testing.T, ctx context.Context, path string) ([]byte, error) { func validateBundle(t *testing.T, ctx context.Context, path string) ([]byte, error) {
t.Setenv("BUNDLE_ROOT", path) ctx = env.Set(ctx, "BUNDLE_ROOT", path)
c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "validate", "--output", "json") c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "validate", "--output", "json")
stdout, _, err := c.Run() stdout, _, err := c.Run()
return stdout.Bytes(), err return stdout.Bytes(), err
} }
func deployBundle(t *testing.T, ctx context.Context, path string) error { func deployBundle(t *testing.T, ctx context.Context, path string) error {
t.Setenv("BUNDLE_ROOT", path) ctx = env.Set(ctx, "BUNDLE_ROOT", path)
c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock", "--auto-approve") c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock", "--auto-approve")
_, _, err := c.Run() _, _, err := c.Run()
return err return err
} }
func deployBundleWithFlags(t *testing.T, ctx context.Context, path string, flags []string) error { func deployBundleWithFlags(t *testing.T, ctx context.Context, path string, flags []string) error {
t.Setenv("BUNDLE_ROOT", path) ctx = env.Set(ctx, "BUNDLE_ROOT", path)
args := []string{"bundle", "deploy", "--force-lock"} args := []string{"bundle", "deploy", "--force-lock"}
args = append(args, flags...) args = append(args, flags...)
c := internal.NewCobraTestRunnerWithContext(t, ctx, args...) c := internal.NewCobraTestRunnerWithContext(t, ctx, args...)
@ -79,6 +80,7 @@ func deployBundleWithFlags(t *testing.T, ctx context.Context, path string, flags
} }
func runResource(t *testing.T, ctx context.Context, path string, key string) (string, error) { func runResource(t *testing.T, ctx context.Context, path string, key string) (string, error) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
ctx = cmdio.NewContext(ctx, cmdio.Default()) ctx = cmdio.NewContext(ctx, cmdio.Default())
c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "run", key) c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "run", key)
@ -87,6 +89,7 @@ func runResource(t *testing.T, ctx context.Context, path string, key string) (st
} }
func runResourceWithParams(t *testing.T, ctx context.Context, path string, key string, params ...string) (string, error) { func runResourceWithParams(t *testing.T, ctx context.Context, path string, key string, params ...string) (string, error) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
ctx = cmdio.NewContext(ctx, cmdio.Default()) ctx = cmdio.NewContext(ctx, cmdio.Default())
args := make([]string, 0) args := make([]string, 0)
@ -98,7 +101,7 @@ func runResourceWithParams(t *testing.T, ctx context.Context, path string, key s
} }
func destroyBundle(t *testing.T, ctx context.Context, path string) error { func destroyBundle(t *testing.T, ctx context.Context, path string) error {
t.Setenv("BUNDLE_ROOT", path) ctx = env.Set(ctx, "BUNDLE_ROOT", path)
c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "destroy", "--auto-approve") c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "destroy", "--auto-approve")
_, _, err := c.Run() _, _, err := c.Run()
return err return err

View File

@ -1,28 +1,19 @@
package bundle package bundle
import ( import (
"os" "context"
"testing" "testing"
"github.com/databricks/cli/internal" "github.com/databricks/cli/internal"
"github.com/databricks/cli/internal/acc" "github.com/databricks/cli/internal/acc"
"github.com/databricks/cli/internal/testutil"
"github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/env"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func runSparkJarTest(t *testing.T, sparkVersion string) { func runSparkJarTestCommon(t *testing.T, ctx context.Context, sparkVersion string, artifactPath string) {
cloudEnv := internal.GetEnvOrSkipTest(t, "CLOUD_ENV") cloudEnv := internal.GetEnvOrSkipTest(t, "CLOUD_ENV")
t.Log(cloudEnv)
if os.Getenv("TEST_METASTORE_ID") == "" {
t.Skip("Skipping tests that require a UC Volume when metastore id is not set.")
}
ctx, wt := acc.WorkspaceTest(t)
w := wt.W
volumePath := internal.TemporaryUcVolume(t, w)
nodeTypeId := internal.GetNodeTypeId(cloudEnv) nodeTypeId := internal.GetNodeTypeId(cloudEnv)
tmpDir := t.TempDir() tmpDir := t.TempDir()
instancePoolId := env.Get(ctx, "TEST_INSTANCE_POOL_ID") instancePoolId := env.Get(ctx, "TEST_INSTANCE_POOL_ID")
@ -31,7 +22,7 @@ func runSparkJarTest(t *testing.T, sparkVersion string) {
"unique_id": uuid.New().String(), "unique_id": uuid.New().String(),
"spark_version": sparkVersion, "spark_version": sparkVersion,
"root": tmpDir, "root": tmpDir,
"artifact_path": volumePath, "artifact_path": artifactPath,
"instance_pool_id": instancePoolId, "instance_pool_id": instancePoolId,
}, tmpDir) }, tmpDir)
require.NoError(t, err) require.NoError(t, err)
@ -48,6 +39,62 @@ func runSparkJarTest(t *testing.T, sparkVersion string) {
require.Contains(t, out, "Hello from Jar!") require.Contains(t, out, "Hello from Jar!")
} }
func TestAccSparkJarTaskDeployAndRunOnVolumes(t *testing.T) { func runSparkJarTestFromVolume(t *testing.T, sparkVersion string) {
runSparkJarTest(t, "14.3.x-scala2.12") ctx, wt := acc.UcWorkspaceTest(t)
volumePath := internal.TemporaryUcVolume(t, wt.W)
ctx = env.Set(ctx, "DATABRICKS_BUNDLE_TARGET", "volume")
runSparkJarTestCommon(t, ctx, sparkVersion, volumePath)
}
func runSparkJarTestFromWorkspace(t *testing.T, sparkVersion string) {
ctx, _ := acc.WorkspaceTest(t)
ctx = env.Set(ctx, "DATABRICKS_BUNDLE_TARGET", "workspace")
runSparkJarTestCommon(t, ctx, sparkVersion, "n/a")
}
func TestAccSparkJarTaskDeployAndRunOnVolumes(t *testing.T) {
internal.GetEnvOrSkipTest(t, "CLOUD_ENV")
testutil.RequireJDK(t, context.Background(), "1.8.0")
// Failure on earlier DBR versions:
//
// JAR installation from Volumes is supported on UC Clusters with DBR >= 13.3.
// Denied library is Jar(/Volumes/main/test-schema-ldgaklhcahlg/my-volume/.internal/PrintArgs.jar)
//
versions := []string{
"13.3.x-scala2.12", // 13.3 LTS (includes Apache Spark 3.4.1, Scala 2.12)
"14.3.x-scala2.12", // 14.3 LTS (includes Apache Spark 3.5.0, Scala 2.12)
"15.4.x-scala2.12", // 15.4 LTS Beta (includes Apache Spark 3.5.0, Scala 2.12)
}
for _, version := range versions {
t.Run(version, func(t *testing.T) {
t.Parallel()
runSparkJarTestFromVolume(t, version)
})
}
}
func TestAccSparkJarTaskDeployAndRunOnWorkspace(t *testing.T) {
internal.GetEnvOrSkipTest(t, "CLOUD_ENV")
testutil.RequireJDK(t, context.Background(), "1.8.0")
// Failure on earlier DBR versions:
//
// Library from /Workspace is not allowed on this cluster.
// Please switch to using DBR 14.1+ No Isolation Shared or DBR 13.1+ Shared cluster or 13.2+ Assigned cluster to use /Workspace libraries.
//
versions := []string{
"14.3.x-scala2.12", // 14.3 LTS (includes Apache Spark 3.5.0, Scala 2.12)
"15.4.x-scala2.12", // 15.4 LTS Beta (includes Apache Spark 3.5.0, Scala 2.12)
}
for _, version := range versions {
t.Run(version, func(t *testing.T) {
t.Parallel()
runSparkJarTestFromWorkspace(t, version)
})
}
} }

24
internal/testutil/jdk.go Normal file
View File

@ -0,0 +1,24 @@
package testutil
import (
"bytes"
"context"
"os/exec"
"strings"
"testing"
"github.com/stretchr/testify/require"
)
func RequireJDK(t *testing.T, ctx context.Context, version string) {
var stderr bytes.Buffer
cmd := exec.Command("javac", "-version")
cmd.Stderr = &stderr
err := cmd.Run()
require.NoError(t, err, "Unable to run javac -version")
// Get the first line of the output
line := strings.Split(stderr.String(), "\n")[0]
require.Contains(t, line, version, "Expected JDK version %s, got %s", version, line)
}