Always prepend bundle remote paths with /Workspace (#1724)

## Changes
Due to platform changes, all libraries, notebooks and etc. paths used in
Databricks must be started with either /Workspace or /Volumes prefix.

This PR makes sure that all bundle paths are correctly prefixed.

Note: this change is a breaking change if user previously configured and
used `/Workspace/Workspace` folder in their workspace file system or
having `/Workspace/${workspace.root_path}...` pattern configured
anywhere in their bundle config

Fixes: #1751

AI:
- [x] Scan DABs config and error out on
`/Workspace/${workspace.root_path}...` pattern usage

## Tests
Added unit tests

---------

Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
This commit is contained in:
Andrew Nester 2024-10-02 17:34:00 +02:00 committed by GitHub
parent 80d55f4540
commit a8cff48c0b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 327 additions and 17 deletions

View File

@ -33,7 +33,7 @@ func (m *expandWorkspaceRoot) Apply(ctx context.Context, b *bundle.Bundle) diag.
}
if strings.HasPrefix(root, "~/") {
home := fmt.Sprintf("/Users/%s", currentUser.UserName)
home := fmt.Sprintf("/Workspace/Users/%s", currentUser.UserName)
b.Config.Workspace.RootPath = path.Join(home, root[2:])
}

View File

@ -27,7 +27,7 @@ func TestExpandWorkspaceRoot(t *testing.T) {
}
diags := bundle.Apply(context.Background(), b, mutator.ExpandWorkspaceRoot())
require.NoError(t, diags.Error())
assert.Equal(t, "/Users/jane@doe.com/foo", b.Config.Workspace.RootPath)
assert.Equal(t, "/Workspace/Users/jane@doe.com/foo", b.Config.Workspace.RootPath)
}
func TestExpandWorkspaceRootDoesNothing(t *testing.T) {

View File

@ -0,0 +1,67 @@
package mutator
import (
"context"
"fmt"
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)
type prependWorkspacePrefix struct{}
// PrependWorkspacePrefix prepends the workspace root path to all paths in the bundle.
func PrependWorkspacePrefix() bundle.Mutator {
return &prependWorkspacePrefix{}
}
func (m *prependWorkspacePrefix) Name() string {
return "PrependWorkspacePrefix"
}
var skipPrefixes = []string{
"/Workspace/",
"/Volumes/",
}
func (m *prependWorkspacePrefix) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
patterns := []dyn.Pattern{
dyn.NewPattern(dyn.Key("workspace"), dyn.Key("root_path")),
dyn.NewPattern(dyn.Key("workspace"), dyn.Key("file_path")),
dyn.NewPattern(dyn.Key("workspace"), dyn.Key("artifact_path")),
dyn.NewPattern(dyn.Key("workspace"), dyn.Key("state_path")),
}
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
var err error
for _, pattern := range patterns {
v, err = dyn.MapByPattern(v, pattern, func(p dyn.Path, pv dyn.Value) (dyn.Value, error) {
path, ok := pv.AsString()
if !ok {
return dyn.InvalidValue, fmt.Errorf("expected string, got %s", v.Kind())
}
for _, prefix := range skipPrefixes {
if strings.HasPrefix(path, prefix) {
return pv, nil
}
}
return dyn.NewValue(fmt.Sprintf("/Workspace%s", path), v.Locations()), nil
})
if err != nil {
return dyn.InvalidValue, err
}
}
return v, nil
})
if err != nil {
return diag.FromErr(err)
}
return nil
}

View File

@ -0,0 +1,79 @@
package mutator
import (
"context"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/stretchr/testify/require"
)
func TestPrependWorkspacePrefix(t *testing.T) {
testCases := []struct {
path string
expected string
}{
{
path: "/Users/test",
expected: "/Workspace/Users/test",
},
{
path: "/Shared/test",
expected: "/Workspace/Shared/test",
},
{
path: "/Workspace/Users/test",
expected: "/Workspace/Users/test",
},
{
path: "/Volumes/Users/test",
expected: "/Volumes/Users/test",
},
}
for _, tc := range testCases {
b := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
RootPath: tc.path,
ArtifactPath: tc.path,
FilePath: tc.path,
StatePath: tc.path,
},
},
}
diags := bundle.Apply(context.Background(), b, PrependWorkspacePrefix())
require.Empty(t, diags)
require.Equal(t, tc.expected, b.Config.Workspace.RootPath)
require.Equal(t, tc.expected, b.Config.Workspace.ArtifactPath)
require.Equal(t, tc.expected, b.Config.Workspace.FilePath)
require.Equal(t, tc.expected, b.Config.Workspace.StatePath)
}
}
func TestPrependWorkspaceForDefaultConfig(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Bundle: config.Bundle{
Name: "test",
Target: "dev",
},
Workspace: config.Workspace{
CurrentUser: &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
},
},
},
}
diags := bundle.Apply(context.Background(), b, bundle.Seq(DefineDefaultWorkspaceRoot(), ExpandWorkspaceRoot(), DefineDefaultWorkspacePaths(), PrependWorkspacePrefix()))
require.Empty(t, diags)
require.Equal(t, "/Workspace/Users/jane@doe.com/.bundle/test/dev", b.Config.Workspace.RootPath)
require.Equal(t, "/Workspace/Users/jane@doe.com/.bundle/test/dev/artifacts", b.Config.Workspace.ArtifactPath)
require.Equal(t, "/Workspace/Users/jane@doe.com/.bundle/test/dev/files", b.Config.Workspace.FilePath)
require.Equal(t, "/Workspace/Users/jane@doe.com/.bundle/test/dev/state", b.Config.Workspace.StatePath)
}

View File

@ -0,0 +1,72 @@
package mutator
import (
"context"
"fmt"
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)
type rewriteWorkspacePrefix struct{}
// RewriteWorkspacePrefix finds any strings in bundle configration that have
// workspace prefix plus workspace path variable used and removes workspace prefix from it.
func RewriteWorkspacePrefix() bundle.Mutator {
return &rewriteWorkspacePrefix{}
}
func (m *rewriteWorkspacePrefix) Name() string {
return "RewriteWorkspacePrefix"
}
func (m *rewriteWorkspacePrefix) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
diags := diag.Diagnostics{}
paths := map[string]string{
"/Workspace/${workspace.root_path}": "${workspace.root_path}",
"/Workspace${workspace.root_path}": "${workspace.root_path}",
"/Workspace/${workspace.file_path}": "${workspace.file_path}",
"/Workspace${workspace.file_path}": "${workspace.file_path}",
"/Workspace/${workspace.artifact_path}": "${workspace.artifact_path}",
"/Workspace${workspace.artifact_path}": "${workspace.artifact_path}",
"/Workspace/${workspace.state_path}": "${workspace.state_path}",
"/Workspace${workspace.state_path}": "${workspace.state_path}",
}
err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) {
// Walk through the bundle configuration, check all the string leafs and
// see if any of the prefixes are used in the remote path.
return dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
vv, ok := v.AsString()
if !ok {
return v, nil
}
for path, replacePath := range paths {
if strings.Contains(vv, path) {
newPath := strings.Replace(vv, path, replacePath, 1)
diags = append(diags, diag.Diagnostic{
Severity: diag.Warning,
Summary: fmt.Sprintf("substring %q found in %q. Please update this to %q.", path, vv, newPath),
Detail: "For more information, please refer to: https://docs.databricks.com/en/release-notes/dev-tools/bundles.html#workspace-paths",
Locations: v.Locations(),
Paths: []dyn.Path{p},
})
// Remove the workspace prefix from the string.
return dyn.NewValue(newPath, v.Locations()), nil
}
}
return v, nil
})
})
if err != nil {
return diag.FromErr(err)
}
return diags
}

View File

@ -0,0 +1,85 @@
package mutator
import (
"context"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/require"
)
func TestNoWorkspacePrefixUsed(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
RootPath: "/Workspace/Users/test",
ArtifactPath: "/Workspace/Users/test/artifacts",
FilePath: "/Workspace/Users/test/files",
StatePath: "/Workspace/Users/test/state",
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"test_job": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
SparkPythonTask: &jobs.SparkPythonTask{
PythonFile: "/Workspace/${workspace.root_path}/file1.py",
},
},
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "/Workspace${workspace.file_path}/notebook1",
},
Libraries: []compute.Library{
{
Jar: "/Workspace/${workspace.artifact_path}/jar1.jar",
},
},
},
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "${workspace.file_path}/notebook2",
},
Libraries: []compute.Library{
{
Jar: "${workspace.artifact_path}/jar2.jar",
},
},
},
},
},
},
},
},
},
}
diags := bundle.Apply(context.Background(), b, RewriteWorkspacePrefix())
require.Len(t, diags, 3)
expectedErrors := map[string]bool{
`substring "/Workspace/${workspace.root_path}" found in "/Workspace/${workspace.root_path}/file1.py". Please update this to "${workspace.root_path}/file1.py".`: true,
`substring "/Workspace${workspace.file_path}" found in "/Workspace${workspace.file_path}/notebook1". Please update this to "${workspace.file_path}/notebook1".`: true,
`substring "/Workspace/${workspace.artifact_path}" found in "/Workspace/${workspace.artifact_path}/jar1.jar". Please update this to "${workspace.artifact_path}/jar1.jar".`: true,
}
for _, d := range diags {
require.Equal(t, d.Severity, diag.Warning)
require.Contains(t, expectedErrors, d.Summary)
delete(expectedErrors, d.Summary)
}
require.Equal(t, "${workspace.root_path}/file1.py", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[0].SparkPythonTask.PythonFile)
require.Equal(t, "${workspace.file_path}/notebook1", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[1].NotebookTask.NotebookPath)
require.Equal(t, "${workspace.artifact_path}/jar1.jar", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[1].Libraries[0].Jar)
require.Equal(t, "${workspace.file_path}/notebook2", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[2].NotebookTask.NotebookPath)
require.Equal(t, "${workspace.artifact_path}/jar2.jar", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[2].Libraries[0].Jar)
}

View File

@ -47,7 +47,7 @@ type Workspace struct {
// Remote workspace base path for deployment state, for artifacts, as synchronization target.
// This defaults to "~/.bundle/${bundle.name}/${bundle.target}" where "~" expands to
// the current user's home directory in the workspace (e.g. `/Users/jane@doe.com`).
// the current user's home directory in the workspace (e.g. `/Workspace/Users/jane@doe.com`).
RootPath string `json:"root_path,omitempty"`
// Remote workspace path to synchronize local files to.

View File

@ -39,9 +39,16 @@ func Initialize() bundle.Mutator {
mutator.MergePipelineClusters(),
mutator.InitializeWorkspaceClient(),
mutator.PopulateCurrentUser(),
mutator.DefineDefaultWorkspaceRoot(),
mutator.ExpandWorkspaceRoot(),
mutator.DefineDefaultWorkspacePaths(),
mutator.PrependWorkspacePrefix(),
// This mutator needs to be run before variable interpolation because it
// searches for strings with variable references in them.
mutator.RewriteWorkspacePrefix(),
mutator.SetVariables(),
// Intentionally placed before ResolveVariableReferencesInLookup, ResolveResourceReferences,
// ResolveVariableReferencesInComplexVariables and ResolveVariableReferences.

View File

@ -11,7 +11,7 @@ func TestExpandPipelineGlobPaths(t *testing.T) {
require.NoError(t, diags.Error())
require.Equal(
t,
"/Users/user@domain.com/.bundle/pipeline_glob_paths/default/files/dlt/nyc_taxi_loader",
"/Workspace/Users/user@domain.com/.bundle/pipeline_glob_paths/default/files/dlt/nyc_taxi_loader",
b.Config.Resources.Pipelines["nyc_taxi_pipeline"].Libraries[0].Notebook.Path,
)
}

View File

@ -12,9 +12,9 @@ func TestRelativePathTranslationDefault(t *testing.T) {
require.NoError(t, diags.Error())
t0 := b.Config.Resources.Jobs["job"].Tasks[0]
assert.Equal(t, "/remote/src/file1.py", t0.SparkPythonTask.PythonFile)
assert.Equal(t, "/Workspace/remote/src/file1.py", t0.SparkPythonTask.PythonFile)
t1 := b.Config.Resources.Jobs["job"].Tasks[1]
assert.Equal(t, "/remote/src/file1.py", t1.SparkPythonTask.PythonFile)
assert.Equal(t, "/Workspace/remote/src/file1.py", t1.SparkPythonTask.PythonFile)
}
func TestRelativePathTranslationOverride(t *testing.T) {
@ -22,7 +22,7 @@ func TestRelativePathTranslationOverride(t *testing.T) {
require.NoError(t, diags.Error())
t0 := b.Config.Resources.Jobs["job"].Tasks[0]
assert.Equal(t, "/remote/src/file2.py", t0.SparkPythonTask.PythonFile)
assert.Equal(t, "/Workspace/remote/src/file2.py", t0.SparkPythonTask.PythonFile)
t1 := b.Config.Resources.Jobs["job"].Tasks[1]
assert.Equal(t, "/remote/src/file2.py", t1.SparkPythonTask.PythonFile)
assert.Equal(t, "/Workspace/remote/src/file2.py", t1.SparkPythonTask.PythonFile)
}

View File

@ -236,7 +236,7 @@ func TestAccDeployBasicBundleLogs(t *testing.T) {
stdout, stderr := blackBoxRun(t, root, "bundle", "deploy")
assert.Equal(t, strings.Join([]string{
fmt.Sprintf("Uploading bundle files to /Users/%s/.bundle/%s/files...", currentUser.UserName, uniqueId),
fmt.Sprintf("Uploading bundle files to /Workspace/Users/%s/.bundle/%s/files...", currentUser.UserName, uniqueId),
"Deploying resources...",
"Updating deployment state...",
"Deployment complete!\n",

View File

@ -114,7 +114,7 @@ func getBundleRemoteRootPath(w *databricks.WorkspaceClient, t *testing.T, unique
// Compute root path for the bundle deployment
me, err := w.CurrentUser.Me(context.Background())
require.NoError(t, err)
root := fmt.Sprintf("/Users/%s/.bundle/%s", me.UserName, uniqueId)
root := fmt.Sprintf("/Workspace/Users/%s/.bundle/%s", me.UserName, uniqueId)
return root
}

View File

@ -24,8 +24,8 @@ targets:
mode: production
workspace:
host: {{workspace_host}}
# We explicitly specify /Users/{{user_name}} to make sure we only have a single copy.
root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
# We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy.
root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
permissions:
- {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
level: CAN_MANAGE

View File

@ -21,8 +21,8 @@ targets:
mode: production
workspace:
host: {{workspace_host}}
# We explicitly specify /Users/{{user_name}} to make sure we only have a single copy.
root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
# We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy.
root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
permissions:
- {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
level: CAN_MANAGE

View File

@ -15,4 +15,4 @@ resources:
path: ../src/dlt_pipeline.ipynb
configuration:
bundle.sourcePath: /Workspace/${workspace.file_path}/src
bundle.sourcePath: ${workspace.file_path}/src

View File

@ -41,8 +41,8 @@ targets:
mode: production
workspace:
host: {{workspace_host}}
# We explicitly specify /Users/{{user_name}} to make sure we only have a single copy.
root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
# We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy.
root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
variables:
warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}}
catalog: {{.default_catalog}}