Compare commits

..

2 Commits

Author SHA1 Message Date
Andrew Nester 71cf426755
Added E2E test to run Python wheels on interactive cluster created in bundle (#1864)
## Changes
Added E2E test to run python wheels on interactive cluster created in
bundle.

We had a gap in testing wheel on all purpose clusters, so this PR
addresses the gap
2024-11-01 14:22:47 +00:00
shreyas-goenka f3bf33da27
Add `cmd-exec-id` to user agent (#1808)
## Changes

This PR adds the `cmd-exec-id` field to the user agent. This allows us
to correlate multiple HTTP requests made from the CLI.

### Why Not Use HTTP traceparent?
We considered using the traceparent header in HTTP as an alternative,
but it's not a good fit for our use case. Here's why:
1. Purpose of traceparent: It's designed to trace a single HTTP request
across a distributed system as it moves through subsystems and proxies.
2. Our requirement: We need to trace multiple HTTP requests made during
a single command execution in the CLI.

For more details about how traceparent itself works and how it's used in
the Go SDK, see
https://github.com/databricks/databricks-sdk-go/pull/914.

## Tests
Unit test
2024-11-01 14:08:09 +00:00
10 changed files with 151 additions and 5 deletions

View File

@ -75,6 +75,7 @@ func New(ctx context.Context) *cobra.Command {
// Configure our user agent with the command that's about to be executed. // Configure our user agent with the command that's about to be executed.
ctx = withCommandInUserAgent(ctx, cmd) ctx = withCommandInUserAgent(ctx, cmd)
ctx = withCommandExecIdInUserAgent(ctx)
ctx = withUpstreamInUserAgent(ctx) ctx = withUpstreamInUserAgent(ctx)
cmd.SetContext(ctx) cmd.SetContext(ctx)
return nil return nil

View File

@ -0,0 +1,14 @@
package root
import (
"context"
"github.com/databricks/databricks-sdk-go/useragent"
"github.com/google/uuid"
)
func withCommandExecIdInUserAgent(ctx context.Context) context.Context {
// A UUID that will allow us to correlate multiple API requests made by
// the same CLI invocation.
return useragent.InContext(ctx, "cmd-exec-id", uuid.New().String())
}

View File

@ -0,0 +1,26 @@
package root
import (
"context"
"regexp"
"testing"
"github.com/databricks/databricks-sdk-go/useragent"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestWithCommandExecIdInUserAgent(t *testing.T) {
ctx := withCommandExecIdInUserAgent(context.Background())
// Check that the command exec ID is in the user agent string.
ua := useragent.FromContext(ctx)
re := regexp.MustCompile(`cmd-exec-id/([a-f0-9-]+)`)
matches := re.FindAllStringSubmatch(ua, -1)
// Assert that we have exactly one match and that it's a valid UUID.
require.Len(t, matches, 1)
_, err := uuid.Parse(matches[0][1])
assert.NoError(t, err)
}

View File

@ -1,13 +1,15 @@
package root package root
import ( import (
"context"
"testing" "testing"
"github.com/databricks/databricks-sdk-go/useragent"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func TestCommandString(t *testing.T) { func TestWithCommandInUserAgent(t *testing.T) {
root := &cobra.Command{ root := &cobra.Command{
Use: "root", Use: "root",
} }
@ -26,4 +28,9 @@ func TestCommandString(t *testing.T) {
assert.Equal(t, "root", commandString(root)) assert.Equal(t, "root", commandString(root))
assert.Equal(t, "hello", commandString(hello)) assert.Equal(t, "hello", commandString(hello))
assert.Equal(t, "hello_world", commandString(world)) assert.Equal(t, "hello_world", commandString(world))
ctx := withCommandInUserAgent(context.Background(), world)
ua := useragent.FromContext(ctx)
assert.Contains(t, ua, "cmd/hello_world")
} }

View File

@ -0,0 +1,25 @@
{
"properties": {
"project_name": {
"type": "string",
"default": "my_test_code",
"description": "Unique name for this project"
},
"spark_version": {
"type": "string",
"description": "Spark version used for job cluster"
},
"node_type_id": {
"type": "string",
"description": "Node type id for job cluster"
},
"unique_id": {
"type": "string",
"description": "Unique ID for job name"
},
"instance_pool_id": {
"type": "string",
"description": "Instance pool id for job cluster"
}
}
}

View File

@ -0,0 +1,29 @@
bundle:
name: wheel-task
workspace:
root_path: "~/.bundle/{{.unique_id}}"
resources:
clusters:
test_cluster:
cluster_name: "test-cluster-{{.unique_id}}"
spark_version: "{{.spark_version}}"
node_type_id: "{{.node_type_id}}"
num_workers: 1
data_security_mode: USER_ISOLATION
jobs:
some_other_job:
name: "[${bundle.target}] Test Wheel Job {{.unique_id}}"
tasks:
- task_key: TestTask
existing_cluster_id: "${resources.clusters.test_cluster.cluster_id}"
python_wheel_task:
package_name: my_test_code
entry_point: run
parameters:
- "one"
- "two"
libraries:
- whl: ./dist/*.whl

View File

@ -0,0 +1,15 @@
from setuptools import setup, find_packages
import {{.project_name}}
setup(
name="{{.project_name}}",
version={{.project_name}}.__version__,
author={{.project_name}}.__author__,
url="https://databricks.com",
author_email="john.doe@databricks.com",
description="my example wheel",
packages=find_packages(include=["{{.project_name}}"]),
entry_points={"group1": "run={{.project_name}}.__main__:main"},
install_requires=["setuptools"],
)

View File

@ -0,0 +1,2 @@
__version__ = "0.0.1"
__author__ = "Databricks"

View File

@ -0,0 +1,16 @@
"""
The entry point of the Python Wheel
"""
import sys
def main():
# This method will print the provided arguments
print("Hello from my func")
print("Got arguments:")
print(sys.argv)
if __name__ == "__main__":
main()

View File

@ -5,17 +5,18 @@ import (
"github.com/databricks/cli/internal" "github.com/databricks/cli/internal"
"github.com/databricks/cli/internal/acc" "github.com/databricks/cli/internal/acc"
"github.com/databricks/cli/internal/testutil"
"github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/env"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func runPythonWheelTest(t *testing.T, sparkVersion string, pythonWheelWrapper bool) { func runPythonWheelTest(t *testing.T, templateName string, sparkVersion string, pythonWheelWrapper bool) {
ctx, _ := acc.WorkspaceTest(t) ctx, _ := acc.WorkspaceTest(t)
nodeTypeId := internal.GetNodeTypeId(env.Get(ctx, "CLOUD_ENV")) nodeTypeId := internal.GetNodeTypeId(env.Get(ctx, "CLOUD_ENV"))
instancePoolId := env.Get(ctx, "TEST_INSTANCE_POOL_ID") instancePoolId := env.Get(ctx, "TEST_INSTANCE_POOL_ID")
bundleRoot, err := initTestTemplate(t, ctx, "python_wheel_task", map[string]any{ bundleRoot, err := initTestTemplate(t, ctx, templateName, map[string]any{
"node_type_id": nodeTypeId, "node_type_id": nodeTypeId,
"unique_id": uuid.New().String(), "unique_id": uuid.New().String(),
"spark_version": sparkVersion, "spark_version": sparkVersion,
@ -45,9 +46,19 @@ func runPythonWheelTest(t *testing.T, sparkVersion string, pythonWheelWrapper bo
} }
func TestAccPythonWheelTaskDeployAndRunWithoutWrapper(t *testing.T) { func TestAccPythonWheelTaskDeployAndRunWithoutWrapper(t *testing.T) {
runPythonWheelTest(t, "13.3.x-snapshot-scala2.12", false) runPythonWheelTest(t, "python_wheel_task", "13.3.x-snapshot-scala2.12", false)
} }
func TestAccPythonWheelTaskDeployAndRunWithWrapper(t *testing.T) { func TestAccPythonWheelTaskDeployAndRunWithWrapper(t *testing.T) {
runPythonWheelTest(t, "12.2.x-scala2.12", true) runPythonWheelTest(t, "python_wheel_task", "12.2.x-scala2.12", true)
}
func TestAccPythonWheelTaskDeployAndRunOnInteractiveCluster(t *testing.T) {
_, wt := acc.WorkspaceTest(t)
if testutil.IsAWSCloud(wt.T) {
t.Skip("Skipping test for AWS cloud because it is not permitted to create clusters")
}
runPythonWheelTest(t, "python_wheel_task_with_cluster", defaultSparkVersion, false)
} }