From 756e55fabceaf91669a8df682562712a3162da53 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Wed, 20 Nov 2024 13:22:27 +0100 Subject: [PATCH] Source-linked deployments for bundles in the workspace (#1884) ## Changes This change adds a preset for source-linked deployments. It is enabled by default for targets in `development` mode **if** the Databricks CLI is running from the `/Workspace` directory on DBR. It does not have an effect when running the CLI anywhere else. Key highlights: 1. Files in this mode won't be uploaded to workspace 2. Created resources will use references to source files instead of their workspace copies ## Tests 1. Apply preset unit test covering conditional logic 2. High-level process target mode unit test for testing integration between mutators --------- Co-authored-by: Pieter Noordhuis --- bundle/config/mutator/apply_presets.go | 10 ++ bundle/config/mutator/apply_presets_test.go | 85 +++++++++ bundle/config/mutator/process_target_mode.go | 9 + .../mutator/process_target_mode_test.go | 33 ++++ bundle/config/mutator/translate_paths.go | 10 +- bundle/config/mutator/translate_paths_test.go | 161 ++++++++++++++++++ bundle/config/presets.go | 5 + bundle/deploy/files/upload.go | 6 + bundle/trampoline/python_dbr_warning.go | 4 + 9 files changed, 321 insertions(+), 2 deletions(-) diff --git a/bundle/config/mutator/apply_presets.go b/bundle/config/mutator/apply_presets.go index 59b8547b..9cec704e 100644 --- a/bundle/config/mutator/apply_presets.go +++ b/bundle/config/mutator/apply_presets.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/textutil" @@ -221,6 +222,15 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos dashboard.DisplayName = prefix + dashboard.DisplayName } + if config.IsExplicitlyEnabled((b.Config.Presets.SourceLinkedDeployment)) { + isDatabricksWorkspace := dbr.RunsOnRuntime(ctx) && strings.HasPrefix(b.SyncRootPath, "/Workspace/") + if !isDatabricksWorkspace { + disabled := false + b.Config.Presets.SourceLinkedDeployment = &disabled + diags = diags.Extend(diag.Warningf("source-linked deployment is available only in the Databricks Workspace")) + } + } + return diags } diff --git a/bundle/config/mutator/apply_presets_test.go b/bundle/config/mutator/apply_presets_test.go index 24295da4..f11a45d6 100644 --- a/bundle/config/mutator/apply_presets_test.go +++ b/bundle/config/mutator/apply_presets_test.go @@ -2,12 +2,14 @@ package mutator_test import ( "context" + "runtime" "testing" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/dbr" "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/require" @@ -364,3 +366,86 @@ func TestApplyPresetsResourceNotDefined(t *testing.T) { }) } } + +func TestApplyPresetsSourceLinkedDeployment(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("this test is not applicable on Windows because source-linked mode works only in the Databricks Workspace") + } + + testContext := context.Background() + enabled := true + disabled := false + workspacePath := "/Workspace/user.name@company.com" + + tests := []struct { + bundlePath string + ctx context.Context + name string + initialValue *bool + expectedValue *bool + expectedWarning string + }{ + { + name: "preset enabled, bundle in Workspace, databricks runtime", + bundlePath: workspacePath, + ctx: dbr.MockRuntime(testContext, true), + initialValue: &enabled, + expectedValue: &enabled, + }, + { + name: "preset enabled, bundle not in Workspace, databricks runtime", + bundlePath: "/Users/user.name@company.com", + ctx: dbr.MockRuntime(testContext, true), + initialValue: &enabled, + expectedValue: &disabled, + expectedWarning: "source-linked deployment is available only in the Databricks Workspace", + }, + { + name: "preset enabled, bundle in Workspace, not databricks runtime", + bundlePath: workspacePath, + ctx: dbr.MockRuntime(testContext, false), + initialValue: &enabled, + expectedValue: &disabled, + expectedWarning: "source-linked deployment is available only in the Databricks Workspace", + }, + { + name: "preset disabled, bundle in Workspace, databricks runtime", + bundlePath: workspacePath, + ctx: dbr.MockRuntime(testContext, true), + initialValue: &disabled, + expectedValue: &disabled, + }, + { + name: "preset nil, bundle in Workspace, databricks runtime", + bundlePath: workspacePath, + ctx: dbr.MockRuntime(testContext, true), + initialValue: nil, + expectedValue: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := &bundle.Bundle{ + SyncRootPath: tt.bundlePath, + Config: config.Root{ + Presets: config.Presets{ + SourceLinkedDeployment: tt.initialValue, + }, + }, + } + + diags := bundle.Apply(tt.ctx, b, mutator.ApplyPresets()) + if diags.HasError() { + t.Fatalf("unexpected error: %v", diags) + } + + if tt.expectedWarning != "" { + require.Equal(t, tt.expectedWarning, diags[0].Summary) + } + + require.Equal(t, tt.expectedValue, b.Config.Presets.SourceLinkedDeployment) + }) + } + +} diff --git a/bundle/config/mutator/process_target_mode.go b/bundle/config/mutator/process_target_mode.go index 44b53681..df0136fa 100644 --- a/bundle/config/mutator/process_target_mode.go +++ b/bundle/config/mutator/process_target_mode.go @@ -6,6 +6,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/iamutil" @@ -57,6 +58,14 @@ func transformDevelopmentMode(ctx context.Context, b *bundle.Bundle) { t.TriggerPauseStatus = config.Paused } + if !config.IsExplicitlyDisabled(t.SourceLinkedDeployment) { + isInWorkspace := strings.HasPrefix(b.SyncRootPath, "/Workspace/") + if isInWorkspace && dbr.RunsOnRuntime(ctx) { + enabled := true + t.SourceLinkedDeployment = &enabled + } + } + if !config.IsExplicitlyDisabled(t.PipelinesDevelopment) { enabled := true t.PipelinesDevelopment = &enabled diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index 4135d5fd..c5ea9ade 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -3,14 +3,17 @@ package mutator import ( "context" "reflect" + "runtime" "strings" "testing" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/tags" + "github.com/databricks/cli/libs/vfs" sdkconfig "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/compute" @@ -140,6 +143,7 @@ func mockBundle(mode config.Mode) *bundle.Bundle { }, }, }, + SyncRoot: vfs.MustNew("/Users/lennart.kats@databricks.com"), // Use AWS implementation for testing. Tagging: tags.ForCloud(&sdkconfig.Config{ Host: "https://company.cloud.databricks.com", @@ -522,3 +526,32 @@ func TestPipelinesDevelopmentDisabled(t *testing.T) { assert.False(t, b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) } + +func TestSourceLinkedDeploymentEnabled(t *testing.T) { + b, diags := processSourceLinkedBundle(t, true) + require.NoError(t, diags.Error()) + assert.True(t, *b.Config.Presets.SourceLinkedDeployment) +} + +func TestSourceLinkedDeploymentDisabled(t *testing.T) { + b, diags := processSourceLinkedBundle(t, false) + require.NoError(t, diags.Error()) + assert.False(t, *b.Config.Presets.SourceLinkedDeployment) +} + +func processSourceLinkedBundle(t *testing.T, presetEnabled bool) (*bundle.Bundle, diag.Diagnostics) { + if runtime.GOOS == "windows" { + t.Skip("this test is not applicable on Windows because source-linked mode works only in the Databricks Workspace") + } + + b := mockBundle(config.Development) + + workspacePath := "/Workspace/lennart@company.com/" + b.SyncRootPath = workspacePath + b.Config.Presets.SourceLinkedDeployment = &presetEnabled + + ctx := dbr.MockRuntime(context.Background(), true) + m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) + diags := bundle.Apply(ctx, b, m) + return b, diags +} diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 321fa5b3..1e2484c7 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -11,6 +11,7 @@ import ( "strings" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/notebook" @@ -103,8 +104,13 @@ func (t *translateContext) rewritePath( return fmt.Errorf("path %s is not contained in sync root path", localPath) } - // Prefix remote path with its remote root path. - remotePath := path.Join(t.b.Config.Workspace.FilePath, filepath.ToSlash(localRelPath)) + var workspacePath string + if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { + workspacePath = t.b.SyncRootPath + } else { + workspacePath = t.b.Config.Workspace.FilePath + } + remotePath := path.Join(workspacePath, filepath.ToSlash(localRelPath)) // Convert local path into workspace path via specified function. interp, err := fn(*p, localPath, localRelPath, remotePath) diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 9d655b27..a2032f81 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -4,6 +4,7 @@ import ( "context" "os" "path/filepath" + "runtime" "strings" "testing" @@ -787,3 +788,163 @@ func TestTranslatePathWithComplexVariables(t *testing.T) { b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) } + +func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("this test is not applicable on Windows because source-linked mode works only in the Databricks Workspace") + } + + dir := t.TempDir() + touchNotebookFile(t, filepath.Join(dir, "my_job_notebook.py")) + touchNotebookFile(t, filepath.Join(dir, "my_pipeline_notebook.py")) + touchEmptyFile(t, filepath.Join(dir, "my_python_file.py")) + touchEmptyFile(t, filepath.Join(dir, "dist", "task.jar")) + touchEmptyFile(t, filepath.Join(dir, "requirements.txt")) + + enabled := true + b := &bundle.Bundle{ + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), + Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/bundle", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job": { + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "my_job_notebook.py", + }, + Libraries: []compute.Library{ + {Whl: "./dist/task.whl"}, + }, + }, + { + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "/Users/jane.doe@databricks.com/absolute_remote.py", + }, + }, + { + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "my_job_notebook.py", + }, + Libraries: []compute.Library{ + {Requirements: "requirements.txt"}, + }, + }, + { + SparkPythonTask: &jobs.SparkPythonTask{ + PythonFile: "my_python_file.py", + }, + }, + { + SparkJarTask: &jobs.SparkJarTask{ + MainClassName: "HelloWorld", + }, + Libraries: []compute.Library{ + {Jar: "./dist/task.jar"}, + }, + }, + { + SparkJarTask: &jobs.SparkJarTask{ + MainClassName: "HelloWorldRemote", + }, + Libraries: []compute.Library{ + {Jar: "dbfs:/bundle/dist/task_remote.jar"}, + }, + }, + }, + }, + }, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline": { + PipelineSpec: &pipelines.PipelineSpec{ + Libraries: []pipelines.PipelineLibrary{ + { + Notebook: &pipelines.NotebookLibrary{ + Path: "my_pipeline_notebook.py", + }, + }, + { + Notebook: &pipelines.NotebookLibrary{ + Path: "/Users/jane.doe@databricks.com/absolute_remote.py", + }, + }, + { + File: &pipelines.FileLibrary{ + Path: "my_python_file.py", + }, + }, + }, + }, + }, + }, + }, + Presets: config.Presets{ + SourceLinkedDeployment: &enabled, + }, + }, + } + + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) + diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) + require.NoError(t, diags.Error()) + + // updated to source path + assert.Equal( + t, + filepath.Join(dir, "my_job_notebook"), + b.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath, + ) + assert.Equal( + t, + filepath.Join(dir, "requirements.txt"), + b.Config.Resources.Jobs["job"].Tasks[2].Libraries[0].Requirements, + ) + assert.Equal( + t, + filepath.Join(dir, "my_python_file.py"), + b.Config.Resources.Jobs["job"].Tasks[3].SparkPythonTask.PythonFile, + ) + assert.Equal( + t, + filepath.Join(dir, "my_pipeline_notebook"), + b.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path, + ) + assert.Equal( + t, + filepath.Join(dir, "my_python_file.py"), + b.Config.Resources.Pipelines["pipeline"].Libraries[2].File.Path, + ) + + // left as is + assert.Equal( + t, + filepath.Join("dist", "task.whl"), + b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, + ) + assert.Equal( + t, + "/Users/jane.doe@databricks.com/absolute_remote.py", + b.Config.Resources.Jobs["job"].Tasks[1].NotebookTask.NotebookPath, + ) + assert.Equal( + t, + filepath.Join("dist", "task.jar"), + b.Config.Resources.Jobs["job"].Tasks[4].Libraries[0].Jar, + ) + assert.Equal( + t, + "dbfs:/bundle/dist/task_remote.jar", + b.Config.Resources.Jobs["job"].Tasks[5].Libraries[0].Jar, + ) + assert.Equal( + t, + "/Users/jane.doe@databricks.com/absolute_remote.py", + b.Config.Resources.Pipelines["pipeline"].Libraries[1].Notebook.Path, + ) +} diff --git a/bundle/config/presets.go b/bundle/config/presets.go index 61009a25..30f56c0f 100644 --- a/bundle/config/presets.go +++ b/bundle/config/presets.go @@ -17,6 +17,11 @@ type Presets struct { // JobsMaxConcurrentRuns is the default value for the max concurrent runs of jobs. JobsMaxConcurrentRuns int `json:"jobs_max_concurrent_runs,omitempty"` + // SourceLinkedDeployment indicates whether source-linked deployment is enabled. Works only in Databricks Workspace + // When set to true, resources created during deployment will point to source files in the workspace instead of their workspace copies. + // File synchronization to ${workspace.file_path} is skipped. + SourceLinkedDeployment *bool `json:"source_linked_deployment,omitempty"` + // Tags to add to all resources. Tags map[string]string `json:"tags,omitempty"` } diff --git a/bundle/deploy/files/upload.go b/bundle/deploy/files/upload.go index bab4e176..452850dc 100644 --- a/bundle/deploy/files/upload.go +++ b/bundle/deploy/files/upload.go @@ -7,6 +7,7 @@ import ( "io/fs" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/permissions" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" @@ -23,6 +24,11 @@ func (m *upload) Name() string { } func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { + cmdio.LogString(ctx, "Source-linked deployment is enabled. Deployed resources reference the source files in your working tree instead of separate copies.") + return nil + } + cmdio.LogString(ctx, fmt.Sprintf("Uploading bundle files to %s...", b.Config.Workspace.FilePath)) opts, err := GetSyncOptions(ctx, bundle.ReadOnly(b)) if err != nil { diff --git a/bundle/trampoline/python_dbr_warning.go b/bundle/trampoline/python_dbr_warning.go index f62e9eab..cf3e9aeb 100644 --- a/bundle/trampoline/python_dbr_warning.go +++ b/bundle/trampoline/python_dbr_warning.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/log" @@ -22,6 +23,9 @@ func WrapperWarning() bundle.Mutator { func (m *wrapperWarning) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { if isPythonWheelWrapperOn(b) { + if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { + return diag.Warningf("Python wheel notebook wrapper is not available when using source-linked deployment mode. You can disable this mode by setting 'presets.source_linked_deployment: false'") + } return nil }