Source-linked deployments for bundles in the workspace (#1884)

## Changes

This change adds a preset for source-linked deployments. It is enabled
by default for targets in `development` mode **if** the Databricks CLI
is running from the `/Workspace` directory on DBR. It does not have an
effect when running the CLI anywhere else.

Key highlights:
1. Files in this mode won't be uploaded to workspace
2. Created resources will use references to source files instead of
their workspace copies

## Tests
1. Apply preset unit test covering conditional logic
2. High-level process target mode unit test for testing integration
between mutators

---------

Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
This commit is contained in:
Ilya Kuznetsov 2024-11-20 13:22:27 +01:00 committed by GitHub
parent 886e14910c
commit 756e55fabc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 321 additions and 2 deletions

View File

@ -9,6 +9,7 @@ import (
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/dbr"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/textutil"
@ -221,6 +222,15 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos
dashboard.DisplayName = prefix + dashboard.DisplayName
}
if config.IsExplicitlyEnabled((b.Config.Presets.SourceLinkedDeployment)) {
isDatabricksWorkspace := dbr.RunsOnRuntime(ctx) && strings.HasPrefix(b.SyncRootPath, "/Workspace/")
if !isDatabricksWorkspace {
disabled := false
b.Config.Presets.SourceLinkedDeployment = &disabled
diags = diags.Extend(diag.Warningf("source-linked deployment is available only in the Databricks Workspace"))
}
}
return diags
}

View File

@ -2,12 +2,14 @@ package mutator_test
import (
"context"
"runtime"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/dbr"
"github.com/databricks/databricks-sdk-go/service/catalog"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/require"
@ -364,3 +366,86 @@ func TestApplyPresetsResourceNotDefined(t *testing.T) {
})
}
}
func TestApplyPresetsSourceLinkedDeployment(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("this test is not applicable on Windows because source-linked mode works only in the Databricks Workspace")
}
testContext := context.Background()
enabled := true
disabled := false
workspacePath := "/Workspace/user.name@company.com"
tests := []struct {
bundlePath string
ctx context.Context
name string
initialValue *bool
expectedValue *bool
expectedWarning string
}{
{
name: "preset enabled, bundle in Workspace, databricks runtime",
bundlePath: workspacePath,
ctx: dbr.MockRuntime(testContext, true),
initialValue: &enabled,
expectedValue: &enabled,
},
{
name: "preset enabled, bundle not in Workspace, databricks runtime",
bundlePath: "/Users/user.name@company.com",
ctx: dbr.MockRuntime(testContext, true),
initialValue: &enabled,
expectedValue: &disabled,
expectedWarning: "source-linked deployment is available only in the Databricks Workspace",
},
{
name: "preset enabled, bundle in Workspace, not databricks runtime",
bundlePath: workspacePath,
ctx: dbr.MockRuntime(testContext, false),
initialValue: &enabled,
expectedValue: &disabled,
expectedWarning: "source-linked deployment is available only in the Databricks Workspace",
},
{
name: "preset disabled, bundle in Workspace, databricks runtime",
bundlePath: workspacePath,
ctx: dbr.MockRuntime(testContext, true),
initialValue: &disabled,
expectedValue: &disabled,
},
{
name: "preset nil, bundle in Workspace, databricks runtime",
bundlePath: workspacePath,
ctx: dbr.MockRuntime(testContext, true),
initialValue: nil,
expectedValue: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := &bundle.Bundle{
SyncRootPath: tt.bundlePath,
Config: config.Root{
Presets: config.Presets{
SourceLinkedDeployment: tt.initialValue,
},
},
}
diags := bundle.Apply(tt.ctx, b, mutator.ApplyPresets())
if diags.HasError() {
t.Fatalf("unexpected error: %v", diags)
}
if tt.expectedWarning != "" {
require.Equal(t, tt.expectedWarning, diags[0].Summary)
}
require.Equal(t, tt.expectedValue, b.Config.Presets.SourceLinkedDeployment)
})
}
}

View File

@ -6,6 +6,7 @@ import (
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/dbr"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/iamutil"
@ -57,6 +58,14 @@ func transformDevelopmentMode(ctx context.Context, b *bundle.Bundle) {
t.TriggerPauseStatus = config.Paused
}
if !config.IsExplicitlyDisabled(t.SourceLinkedDeployment) {
isInWorkspace := strings.HasPrefix(b.SyncRootPath, "/Workspace/")
if isInWorkspace && dbr.RunsOnRuntime(ctx) {
enabled := true
t.SourceLinkedDeployment = &enabled
}
}
if !config.IsExplicitlyDisabled(t.PipelinesDevelopment) {
enabled := true
t.PipelinesDevelopment = &enabled

View File

@ -3,14 +3,17 @@ package mutator
import (
"context"
"reflect"
"runtime"
"strings"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/dbr"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/tags"
"github.com/databricks/cli/libs/vfs"
sdkconfig "github.com/databricks/databricks-sdk-go/config"
"github.com/databricks/databricks-sdk-go/service/catalog"
"github.com/databricks/databricks-sdk-go/service/compute"
@ -140,6 +143,7 @@ func mockBundle(mode config.Mode) *bundle.Bundle {
},
},
},
SyncRoot: vfs.MustNew("/Users/lennart.kats@databricks.com"),
// Use AWS implementation for testing.
Tagging: tags.ForCloud(&sdkconfig.Config{
Host: "https://company.cloud.databricks.com",
@ -522,3 +526,32 @@ func TestPipelinesDevelopmentDisabled(t *testing.T) {
assert.False(t, b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development)
}
func TestSourceLinkedDeploymentEnabled(t *testing.T) {
b, diags := processSourceLinkedBundle(t, true)
require.NoError(t, diags.Error())
assert.True(t, *b.Config.Presets.SourceLinkedDeployment)
}
func TestSourceLinkedDeploymentDisabled(t *testing.T) {
b, diags := processSourceLinkedBundle(t, false)
require.NoError(t, diags.Error())
assert.False(t, *b.Config.Presets.SourceLinkedDeployment)
}
func processSourceLinkedBundle(t *testing.T, presetEnabled bool) (*bundle.Bundle, diag.Diagnostics) {
if runtime.GOOS == "windows" {
t.Skip("this test is not applicable on Windows because source-linked mode works only in the Databricks Workspace")
}
b := mockBundle(config.Development)
workspacePath := "/Workspace/lennart@company.com/"
b.SyncRootPath = workspacePath
b.Config.Presets.SourceLinkedDeployment = &presetEnabled
ctx := dbr.MockRuntime(context.Background(), true)
m := bundle.Seq(ProcessTargetMode(), ApplyPresets())
diags := bundle.Apply(ctx, b, m)
return b, diags
}

View File

@ -11,6 +11,7 @@ import (
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/notebook"
@ -103,8 +104,13 @@ func (t *translateContext) rewritePath(
return fmt.Errorf("path %s is not contained in sync root path", localPath)
}
// Prefix remote path with its remote root path.
remotePath := path.Join(t.b.Config.Workspace.FilePath, filepath.ToSlash(localRelPath))
var workspacePath string
if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) {
workspacePath = t.b.SyncRootPath
} else {
workspacePath = t.b.Config.Workspace.FilePath
}
remotePath := path.Join(workspacePath, filepath.ToSlash(localRelPath))
// Convert local path into workspace path via specified function.
interp, err := fn(*p, localPath, localRelPath, remotePath)

View File

@ -4,6 +4,7 @@ import (
"context"
"os"
"path/filepath"
"runtime"
"strings"
"testing"
@ -787,3 +788,163 @@ func TestTranslatePathWithComplexVariables(t *testing.T) {
b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl,
)
}
func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("this test is not applicable on Windows because source-linked mode works only in the Databricks Workspace")
}
dir := t.TempDir()
touchNotebookFile(t, filepath.Join(dir, "my_job_notebook.py"))
touchNotebookFile(t, filepath.Join(dir, "my_pipeline_notebook.py"))
touchEmptyFile(t, filepath.Join(dir, "my_python_file.py"))
touchEmptyFile(t, filepath.Join(dir, "dist", "task.jar"))
touchEmptyFile(t, filepath.Join(dir, "requirements.txt"))
enabled := true
b := &bundle.Bundle{
SyncRootPath: dir,
SyncRoot: vfs.MustNew(dir),
Config: config.Root{
Workspace: config.Workspace{
FilePath: "/bundle",
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "my_job_notebook.py",
},
Libraries: []compute.Library{
{Whl: "./dist/task.whl"},
},
},
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "/Users/jane.doe@databricks.com/absolute_remote.py",
},
},
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "my_job_notebook.py",
},
Libraries: []compute.Library{
{Requirements: "requirements.txt"},
},
},
{
SparkPythonTask: &jobs.SparkPythonTask{
PythonFile: "my_python_file.py",
},
},
{
SparkJarTask: &jobs.SparkJarTask{
MainClassName: "HelloWorld",
},
Libraries: []compute.Library{
{Jar: "./dist/task.jar"},
},
},
{
SparkJarTask: &jobs.SparkJarTask{
MainClassName: "HelloWorldRemote",
},
Libraries: []compute.Library{
{Jar: "dbfs:/bundle/dist/task_remote.jar"},
},
},
},
},
},
},
Pipelines: map[string]*resources.Pipeline{
"pipeline": {
PipelineSpec: &pipelines.PipelineSpec{
Libraries: []pipelines.PipelineLibrary{
{
Notebook: &pipelines.NotebookLibrary{
Path: "my_pipeline_notebook.py",
},
},
{
Notebook: &pipelines.NotebookLibrary{
Path: "/Users/jane.doe@databricks.com/absolute_remote.py",
},
},
{
File: &pipelines.FileLibrary{
Path: "my_python_file.py",
},
},
},
},
},
},
},
Presets: config.Presets{
SourceLinkedDeployment: &enabled,
},
},
}
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}})
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
require.NoError(t, diags.Error())
// updated to source path
assert.Equal(
t,
filepath.Join(dir, "my_job_notebook"),
b.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath,
)
assert.Equal(
t,
filepath.Join(dir, "requirements.txt"),
b.Config.Resources.Jobs["job"].Tasks[2].Libraries[0].Requirements,
)
assert.Equal(
t,
filepath.Join(dir, "my_python_file.py"),
b.Config.Resources.Jobs["job"].Tasks[3].SparkPythonTask.PythonFile,
)
assert.Equal(
t,
filepath.Join(dir, "my_pipeline_notebook"),
b.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path,
)
assert.Equal(
t,
filepath.Join(dir, "my_python_file.py"),
b.Config.Resources.Pipelines["pipeline"].Libraries[2].File.Path,
)
// left as is
assert.Equal(
t,
filepath.Join("dist", "task.whl"),
b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl,
)
assert.Equal(
t,
"/Users/jane.doe@databricks.com/absolute_remote.py",
b.Config.Resources.Jobs["job"].Tasks[1].NotebookTask.NotebookPath,
)
assert.Equal(
t,
filepath.Join("dist", "task.jar"),
b.Config.Resources.Jobs["job"].Tasks[4].Libraries[0].Jar,
)
assert.Equal(
t,
"dbfs:/bundle/dist/task_remote.jar",
b.Config.Resources.Jobs["job"].Tasks[5].Libraries[0].Jar,
)
assert.Equal(
t,
"/Users/jane.doe@databricks.com/absolute_remote.py",
b.Config.Resources.Pipelines["pipeline"].Libraries[1].Notebook.Path,
)
}

View File

@ -17,6 +17,11 @@ type Presets struct {
// JobsMaxConcurrentRuns is the default value for the max concurrent runs of jobs.
JobsMaxConcurrentRuns int `json:"jobs_max_concurrent_runs,omitempty"`
// SourceLinkedDeployment indicates whether source-linked deployment is enabled. Works only in Databricks Workspace
// When set to true, resources created during deployment will point to source files in the workspace instead of their workspace copies.
// File synchronization to ${workspace.file_path} is skipped.
SourceLinkedDeployment *bool `json:"source_linked_deployment,omitempty"`
// Tags to add to all resources.
Tags map[string]string `json:"tags,omitempty"`
}

View File

@ -7,6 +7,7 @@ import (
"io/fs"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/permissions"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/diag"
@ -23,6 +24,11 @@ func (m *upload) Name() string {
}
func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) {
cmdio.LogString(ctx, "Source-linked deployment is enabled. Deployed resources reference the source files in your working tree instead of separate copies.")
return nil
}
cmdio.LogString(ctx, fmt.Sprintf("Uploading bundle files to %s...", b.Config.Workspace.FilePath))
opts, err := GetSyncOptions(ctx, bundle.ReadOnly(b))
if err != nil {

View File

@ -6,6 +6,7 @@ import (
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/libraries"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/log"
@ -22,6 +23,9 @@ func WrapperWarning() bundle.Mutator {
func (m *wrapperWarning) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
if isPythonWheelWrapperOn(b) {
if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) {
return diag.Warningf("Python wheel notebook wrapper is not available when using source-linked deployment mode. You can disable this mode by setting 'presets.source_linked_deployment: false'")
}
return nil
}