From a240be0b5a7eed8d0746ba0f167df0dd465f06c1 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 9 Aug 2024 17:13:31 +0200 Subject: [PATCH] Run Spark JAR task test on multiple DBR versions (#1665) ## Changes This explores error messages on older DBRs and UC vs non-UC. ## Tests Integration tests pass. --- .../template/databricks.yml.tmpl | 28 ++++++- internal/bundle/helpers.go | 11 ++- internal/bundle/spark_jar_test.go | 77 +++++++++++++++---- internal/testutil/jdk.go | 24 ++++++ 4 files changed, 120 insertions(+), 20 deletions(-) create mode 100644 internal/testutil/jdk.go diff --git a/internal/bundle/bundles/spark_jar_task/template/databricks.yml.tmpl b/internal/bundle/bundles/spark_jar_task/template/databricks.yml.tmpl index 8c9331fe..db451cd9 100644 --- a/internal/bundle/bundles/spark_jar_task/template/databricks.yml.tmpl +++ b/internal/bundle/bundles/spark_jar_task/template/databricks.yml.tmpl @@ -3,7 +3,6 @@ bundle: workspace: root_path: "~/.bundle/{{.unique_id}}" - artifact_path: {{.artifact_path}} artifacts: my_java_code: @@ -27,3 +26,30 @@ resources: main_class_name: PrintArgs libraries: - jar: ./{{.project_name}}/PrintArgs.jar + +targets: + volume: + # Override the artifact path to upload artifacts to a volume path + workspace: + artifact_path: {{.artifact_path}} + + resources: + jobs: + jar_job: + tasks: + - task_key: TestSparkJarTask + new_cluster: + + # Force cluster to run in single user mode (force it to be a UC cluster) + data_security_mode: SINGLE_USER + + workspace: + resources: + jobs: + jar_job: + tasks: + - task_key: TestSparkJarTask + new_cluster: + + # Force cluster to run in no isolation mode (force it to be a non-UC cluster) + data_security_mode: NONE diff --git a/internal/bundle/helpers.go b/internal/bundle/helpers.go index 1910a014..03d9cff7 100644 --- a/internal/bundle/helpers.go +++ b/internal/bundle/helpers.go @@ -12,6 +12,7 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/internal" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/template" "github.com/databricks/databricks-sdk-go" @@ -56,21 +57,21 @@ func writeConfigFile(t *testing.T, config map[string]any) (string, error) { } func validateBundle(t *testing.T, ctx context.Context, path string) ([]byte, error) { - t.Setenv("BUNDLE_ROOT", path) + ctx = env.Set(ctx, "BUNDLE_ROOT", path) c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "validate", "--output", "json") stdout, _, err := c.Run() return stdout.Bytes(), err } func deployBundle(t *testing.T, ctx context.Context, path string) error { - t.Setenv("BUNDLE_ROOT", path) + ctx = env.Set(ctx, "BUNDLE_ROOT", path) c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock", "--auto-approve") _, _, err := c.Run() return err } func deployBundleWithFlags(t *testing.T, ctx context.Context, path string, flags []string) error { - t.Setenv("BUNDLE_ROOT", path) + ctx = env.Set(ctx, "BUNDLE_ROOT", path) args := []string{"bundle", "deploy", "--force-lock"} args = append(args, flags...) c := internal.NewCobraTestRunnerWithContext(t, ctx, args...) @@ -79,6 +80,7 @@ func deployBundleWithFlags(t *testing.T, ctx context.Context, path string, flags } func runResource(t *testing.T, ctx context.Context, path string, key string) (string, error) { + ctx = env.Set(ctx, "BUNDLE_ROOT", path) ctx = cmdio.NewContext(ctx, cmdio.Default()) c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "run", key) @@ -87,6 +89,7 @@ func runResource(t *testing.T, ctx context.Context, path string, key string) (st } func runResourceWithParams(t *testing.T, ctx context.Context, path string, key string, params ...string) (string, error) { + ctx = env.Set(ctx, "BUNDLE_ROOT", path) ctx = cmdio.NewContext(ctx, cmdio.Default()) args := make([]string, 0) @@ -98,7 +101,7 @@ func runResourceWithParams(t *testing.T, ctx context.Context, path string, key s } func destroyBundle(t *testing.T, ctx context.Context, path string) error { - t.Setenv("BUNDLE_ROOT", path) + ctx = env.Set(ctx, "BUNDLE_ROOT", path) c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "destroy", "--auto-approve") _, _, err := c.Run() return err diff --git a/internal/bundle/spark_jar_test.go b/internal/bundle/spark_jar_test.go index 98bfa4a9..4b469617 100644 --- a/internal/bundle/spark_jar_test.go +++ b/internal/bundle/spark_jar_test.go @@ -1,28 +1,19 @@ package bundle import ( - "os" + "context" "testing" "github.com/databricks/cli/internal" "github.com/databricks/cli/internal/acc" + "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/env" "github.com/google/uuid" "github.com/stretchr/testify/require" ) -func runSparkJarTest(t *testing.T, sparkVersion string) { +func runSparkJarTestCommon(t *testing.T, ctx context.Context, sparkVersion string, artifactPath string) { cloudEnv := internal.GetEnvOrSkipTest(t, "CLOUD_ENV") - t.Log(cloudEnv) - - if os.Getenv("TEST_METASTORE_ID") == "" { - t.Skip("Skipping tests that require a UC Volume when metastore id is not set.") - } - - ctx, wt := acc.WorkspaceTest(t) - w := wt.W - volumePath := internal.TemporaryUcVolume(t, w) - nodeTypeId := internal.GetNodeTypeId(cloudEnv) tmpDir := t.TempDir() instancePoolId := env.Get(ctx, "TEST_INSTANCE_POOL_ID") @@ -31,7 +22,7 @@ func runSparkJarTest(t *testing.T, sparkVersion string) { "unique_id": uuid.New().String(), "spark_version": sparkVersion, "root": tmpDir, - "artifact_path": volumePath, + "artifact_path": artifactPath, "instance_pool_id": instancePoolId, }, tmpDir) require.NoError(t, err) @@ -48,6 +39,62 @@ func runSparkJarTest(t *testing.T, sparkVersion string) { require.Contains(t, out, "Hello from Jar!") } -func TestAccSparkJarTaskDeployAndRunOnVolumes(t *testing.T) { - runSparkJarTest(t, "14.3.x-scala2.12") +func runSparkJarTestFromVolume(t *testing.T, sparkVersion string) { + ctx, wt := acc.UcWorkspaceTest(t) + volumePath := internal.TemporaryUcVolume(t, wt.W) + ctx = env.Set(ctx, "DATABRICKS_BUNDLE_TARGET", "volume") + runSparkJarTestCommon(t, ctx, sparkVersion, volumePath) +} + +func runSparkJarTestFromWorkspace(t *testing.T, sparkVersion string) { + ctx, _ := acc.WorkspaceTest(t) + ctx = env.Set(ctx, "DATABRICKS_BUNDLE_TARGET", "workspace") + runSparkJarTestCommon(t, ctx, sparkVersion, "n/a") +} + +func TestAccSparkJarTaskDeployAndRunOnVolumes(t *testing.T) { + internal.GetEnvOrSkipTest(t, "CLOUD_ENV") + testutil.RequireJDK(t, context.Background(), "1.8.0") + + // Failure on earlier DBR versions: + // + // JAR installation from Volumes is supported on UC Clusters with DBR >= 13.3. + // Denied library is Jar(/Volumes/main/test-schema-ldgaklhcahlg/my-volume/.internal/PrintArgs.jar) + // + + versions := []string{ + "13.3.x-scala2.12", // 13.3 LTS (includes Apache Spark 3.4.1, Scala 2.12) + "14.3.x-scala2.12", // 14.3 LTS (includes Apache Spark 3.5.0, Scala 2.12) + "15.4.x-scala2.12", // 15.4 LTS Beta (includes Apache Spark 3.5.0, Scala 2.12) + } + + for _, version := range versions { + t.Run(version, func(t *testing.T) { + t.Parallel() + runSparkJarTestFromVolume(t, version) + }) + } +} + +func TestAccSparkJarTaskDeployAndRunOnWorkspace(t *testing.T) { + internal.GetEnvOrSkipTest(t, "CLOUD_ENV") + testutil.RequireJDK(t, context.Background(), "1.8.0") + + // Failure on earlier DBR versions: + // + // Library from /Workspace is not allowed on this cluster. + // Please switch to using DBR 14.1+ No Isolation Shared or DBR 13.1+ Shared cluster or 13.2+ Assigned cluster to use /Workspace libraries. + // + + versions := []string{ + "14.3.x-scala2.12", // 14.3 LTS (includes Apache Spark 3.5.0, Scala 2.12) + "15.4.x-scala2.12", // 15.4 LTS Beta (includes Apache Spark 3.5.0, Scala 2.12) + } + + for _, version := range versions { + t.Run(version, func(t *testing.T) { + t.Parallel() + runSparkJarTestFromWorkspace(t, version) + }) + } } diff --git a/internal/testutil/jdk.go b/internal/testutil/jdk.go new file mode 100644 index 00000000..05bd7d6d --- /dev/null +++ b/internal/testutil/jdk.go @@ -0,0 +1,24 @@ +package testutil + +import ( + "bytes" + "context" + "os/exec" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func RequireJDK(t *testing.T, ctx context.Context, version string) { + var stderr bytes.Buffer + + cmd := exec.Command("javac", "-version") + cmd.Stderr = &stderr + err := cmd.Run() + require.NoError(t, err, "Unable to run javac -version") + + // Get the first line of the output + line := strings.Split(stderr.String(), "\n")[0] + require.Contains(t, line, version, "Expected JDK version %s, got %s", version, line) +}