Refactor TestSparkJarTask* tests to support test environments without Java 8 (#2385)

## Changes
1. Refactored `TestSparkJarTaskDeployAndRunOnVolumes` and
`TestSparkJarTaskDeployAndRunOnWorkspace` to use a table-driven approach
for better organization of similar tests
2. Implemented `testutil.HasJDK()` to replace `testutil.RequireJDK` to
be able to skip tests
3. Ensured the test suite properly fails if no compatible Java version
is found

## Why
It can be tricky to have Java 8 installed on modern dev environments
(e.g. Mac on Apple M3 chip). The absence of which previously caused the
Spark Jar task tests to fail when run locally. This refactoring allows
such environments to be able to run "SparkJar" tests using a newer
Databricks Runtime.

## Tests
1. Ran `TestSparkJarTaskDeployAndRunOnVolumes` and
`TestSparkJarTaskDeployAndRunOnWorkspace` locally on Mac with Java11
installed.
2. Checked that tests against older runtimes are still being run and
passing in CI/CD environments
This commit is contained in:
Anton Nekipelov 2025-02-28 09:39:21 +01:00 committed by GitHub
parent 6a07e05e9b
commit 3aef065c5c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 116 additions and 37 deletions

View File

@ -11,6 +11,50 @@ import (
"github.com/stretchr/testify/require"
)
// sparkJarTestCase defines a Databricks runtime version and a local Java version requirement
type sparkJarTestCase struct {
name string // Test name
runtimeVersion string // The Spark runtime version to test
requiredJavaVersion string // Java version that can compile jar to pass this test
}
// runSparkJarTests runs a set of test cases with appropriate Java version checks
// testRunner is the function that runs the actual test with the runtime version
func runSparkJarTests(t *testing.T, testCases []sparkJarTestCase, testRunner func(t *testing.T, runtimeVersion string)) {
t.Helper()
testCanRun := make(map[string]bool)
atLeastOneCanRun := false
for _, tc := range testCases {
if testutil.HasJDK(t, context.Background(), tc.requiredJavaVersion) {
testCanRun[tc.name] = true
atLeastOneCanRun = true
continue
}
testCanRun[tc.name] = false
}
if !atLeastOneCanRun {
t.Fatal("At least one test is required to pass. All tests were skipped because no compatible Java version was found.")
}
// Run the tests that can run
for _, tc := range testCases {
tc := tc // Capture range variable for goroutine
canRun := testCanRun[tc.name]
t.Run(tc.name, func(t *testing.T) {
if !canRun {
t.Skipf("Skipping %s: requires Java version %v", tc.name, tc.requiredJavaVersion)
return
}
t.Parallel()
testRunner(t, tc.runtimeVersion)
})
}
}
func runSparkJarTestCommon(t *testing.T, ctx context.Context, sparkVersion, artifactPath string) {
nodeTypeId := testutil.GetCloud(t).NodeTypeID()
tmpDir := t.TempDir()
@ -54,46 +98,60 @@ func runSparkJarTestFromWorkspace(t *testing.T, sparkVersion string) {
}
func TestSparkJarTaskDeployAndRunOnVolumes(t *testing.T) {
testutil.RequireJDK(t, context.Background(), "1.8.0")
// Failure on earlier DBR versions:
//
// JAR installation from Volumes is supported on UC Clusters with DBR >= 13.3.
// Denied library is Jar(/Volumes/main/test-schema-ldgaklhcahlg/my-volume/.internal/PrintArgs.jar)
//
versions := []string{
"13.3.x-scala2.12", // 13.3 LTS (includes Apache Spark 3.4.1, Scala 2.12)
"14.3.x-scala2.12", // 14.3 LTS (includes Apache Spark 3.5.0, Scala 2.12)
"15.4.x-scala2.12", // 15.4 LTS Beta (includes Apache Spark 3.5.0, Scala 2.12)
}
for _, version := range versions {
t.Run(version, func(t *testing.T) {
t.Parallel()
runSparkJarTestFromVolume(t, version)
})
testCases := []sparkJarTestCase{
{
name: "Databricks Runtime 13.3 LTS",
runtimeVersion: "13.3.x-scala2.12", // 13.3 LTS (includes Apache Spark 3.4.1, Scala 2.12)
requiredJavaVersion: "1.8.0", // Only JDK 8 is supported
},
{
name: "Databricks Runtime 14.3 LTS",
runtimeVersion: "14.3.x-scala2.12", // 14.3 LTS (includes Apache Spark 3.5.0, Scala 2.12)
requiredJavaVersion: "1.8.0", // Only JDK 8 is supported
},
{
name: "Databricks Runtime 15.4 LTS",
runtimeVersion: "15.4.x-scala2.12", // 15.4 LTS (includes Apache Spark 3.5.0, Scala 2.12)
requiredJavaVersion: "1.8.0", // Only JDK 8 is supported
},
{
name: "Databricks Runtime 16.2",
runtimeVersion: "16.2.x-scala2.12", // 16.2 (includes Apache Spark 3.5.2, Scala 2.12)
requiredJavaVersion: "11.0", // Can run jars compiled by Java 11
},
}
runSparkJarTests(t, testCases, runSparkJarTestFromVolume)
}
func TestSparkJarTaskDeployAndRunOnWorkspace(t *testing.T) {
testutil.RequireJDK(t, context.Background(), "1.8.0")
// Failure on earlier DBR versions:
//
// Library from /Workspace is not allowed on this cluster.
// Please switch to using DBR 14.1+ No Isolation Shared or DBR 13.1+ Shared cluster or 13.2+ Assigned cluster to use /Workspace libraries.
//
versions := []string{
"14.3.x-scala2.12", // 14.3 LTS (includes Apache Spark 3.5.0, Scala 2.12)
"15.4.x-scala2.12", // 15.4 LTS Beta (includes Apache Spark 3.5.0, Scala 2.12)
}
for _, version := range versions {
t.Run(version, func(t *testing.T) {
t.Parallel()
runSparkJarTestFromWorkspace(t, version)
})
testCases := []sparkJarTestCase{
{
name: "Databricks Runtime 14.3 LTS",
runtimeVersion: "14.3.x-scala2.12", // 14.3 LTS (includes Apache Spark 3.5.0, Scala 2.12)
requiredJavaVersion: "1.8.0", // Only JDK 8 is supported
},
{
name: "Databricks Runtime 15.4 LTS",
runtimeVersion: "15.4.x-scala2.12", // 15.4 LTS (includes Apache Spark 3.5.0, Scala 2.12)
requiredJavaVersion: "1.8.0", // Only JDK 8 is supported
},
{
name: "Databricks Runtime 16.2",
runtimeVersion: "16.2.x-scala2.12", // 16.2 (includes Apache Spark 3.5.2, Scala 2.12)
requiredJavaVersion: "11.0", // Can run jars compiled by Java 11
},
}
runSparkJarTests(t, testCases, runSparkJarTestFromWorkspace)
}

View File

@ -1,23 +1,44 @@
package testutil
import (
"bytes"
"context"
"os/exec"
"strings"
"github.com/stretchr/testify/require"
)
func RequireJDK(t TestingT, ctx context.Context, version string) {
var stderr bytes.Buffer
// HasJDK checks if the specified Java version is available in the system.
// It returns true if the required JDK version is present, false otherwise.
// This is a non-failing variant of RequireJDK.
//
// Example output of `java -version` in eclipse-temurin:8:
// openjdk version "1.8.0_442"
// OpenJDK Runtime Environment (Temurin)(build 1.8.0_442-b06)
// OpenJDK 64-Bit Server VM (Temurin)(build 25.442-b06, mixed mode)
//
// Example output of `java -version` in java11 (homebrew):
// openjdk version "11.0.26" 2025-01-21
// OpenJDK Runtime Environment Homebrew (build 11.0.26+0)
// OpenJDK 64-Bit Server VM Homebrew (build 11.0.26+0, mixed mode)
func HasJDK(t TestingT, ctx context.Context, version string) bool {
t.Helper()
cmd := exec.Command("javac", "-version")
cmd.Stderr = &stderr
err := cmd.Run()
require.NoError(t, err, "Unable to run javac -version")
// Get the first line of the output
line := strings.Split(stderr.String(), "\n")[0]
require.Contains(t, line, version, "Expected JDK version %s, got %s", version, line)
// Try to execute "java -version" command
cmd := exec.CommandContext(ctx, "java", "-version")
output, err := cmd.CombinedOutput()
if err != nil {
t.Logf("Failed to execute java -version: %v", err)
return false
}
javaVersionOutput := string(output)
// Check if the output contains the expected version
expectedVersionString := "version \"" + version
if strings.Contains(javaVersionOutput, expectedVersionString) {
t.Logf("Detected JDK version %s", version)
return true
}
t.Logf("Required JDK version %s not found, instead got: %s", version, javaVersionOutput)
return false
}