diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 5e016d8a1..f7b6547e8 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -14,6 +14,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/notebook" ) @@ -58,6 +59,47 @@ type translateContext struct { seen map[string]string } +// GetLocalPath returns the local file system paths for a path referenced from a resource.. +// If it's an absolute path, we treat it as a workspace path and return "". +// +// Arguments: +// +// sourceDir - the source directory for the resource definition. +// p - the path referenced from the resource definition. +// +// Returns: +// +// localPath - the full local file system path. +// localRelPath - the relative path from the base directory. +func GetLocalPath(ctx context.Context, b *bundle.Bundle, sourceDir string, p string) (string, string, error) { + if p == "" { + return "", "", fmt.Errorf("path cannot be empty") + } + if path.IsAbs(p) { + return "", "", nil + } + + url, err := url.Parse(p) + if err != nil { + // Apparently this path is not a URL; this can happen for paths that + // have non-URL characters like './profit-%.csv'. + log.Warnf(ctx, "Failed to parse path as a URL '%s': %v", p, err) + } else if url.Scheme != "" { + return "", "", nil + } + + localPath := filepath.Join(sourceDir, filepath.FromSlash(p)) + localRelPath, err := filepath.Rel(b.SyncRootPath, localPath) + if err != nil { + return "", "", err + } + if strings.HasPrefix(localRelPath, "..") { + return "", "", fmt.Errorf("path '%s' is not contained in sync root path", p) + } + + return localPath, localRelPath, nil +} + // rewritePath converts a given relative path from the loaded config to a new path based on the passed rewriting function // // It takes these arguments: @@ -68,42 +110,25 @@ type translateContext struct { // // The function returns an error if it is impossible to rewrite the given relative path. func (t *translateContext) rewritePath( + ctx context.Context, dir string, p *string, fn rewriteFunc, ) error { - // We assume absolute paths point to a location in the workspace - if path.IsAbs(*p) { - return nil - } - - url, err := url.Parse(*p) + localPath, localRelPath, err := GetLocalPath(ctx, t.b, dir, *p) if err != nil { return err } - - // If the file path has scheme, it's a full path and we don't need to transform it - if url.Scheme != "" { + if localPath == "" { + // Skip absolute paths return nil } - // Local path is relative to the directory the resource was defined in. - localPath := filepath.Join(dir, filepath.FromSlash(*p)) if interp, ok := t.seen[localPath]; ok { *p = interp return nil } - // Local path must be contained in the sync root. - // If it isn't, it won't be synchronized into the workspace. - localRelPath, err := filepath.Rel(t.b.SyncRootPath, localPath) - if err != nil { - return err - } - if strings.HasPrefix(localRelPath, "..") { - return fmt.Errorf("path %s is not contained in sync root path", localPath) - } - var workspacePath string if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { workspacePath = t.b.SyncRootPath @@ -215,9 +240,9 @@ func (t *translateContext) translateNoOpWithPrefix(literal, localFullPath, local return localRelPath, nil } -func (t *translateContext) rewriteValue(p dyn.Path, v dyn.Value, fn rewriteFunc, dir string) (dyn.Value, error) { +func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.Value, fn rewriteFunc, dir string) (dyn.Value, error) { out := v.MustString() - err := t.rewritePath(dir, &out, fn) + err := t.rewritePath(ctx, dir, &out, fn) if err != nil { if target := (&ErrIsNotebook{}); errors.As(err, target) { return dyn.InvalidValue, fmt.Errorf(`expected a file for "%s" but got a notebook: %w`, p, target) @@ -231,15 +256,15 @@ func (t *translateContext) rewriteValue(p dyn.Path, v dyn.Value, fn rewriteFunc, return dyn.NewValue(out, v.Locations()), nil } -func (t *translateContext) rewriteRelativeTo(p dyn.Path, v dyn.Value, fn rewriteFunc, dir, fallback string) (dyn.Value, error) { - nv, err := t.rewriteValue(p, v, fn, dir) +func (t *translateContext) rewriteRelativeTo(ctx context.Context, p dyn.Path, v dyn.Value, fn rewriteFunc, dir, fallback string) (dyn.Value, error) { + nv, err := t.rewriteValue(ctx, p, v, fn, dir) if err == nil { return nv, nil } // If we failed to rewrite the path, try to rewrite it relative to the fallback directory. if fallback != "" { - nv, nerr := t.rewriteValue(p, v, fn, fallback) + nv, nerr := t.rewriteValue(ctx, p, v, fn, fallback) if nerr == nil { // TODO: Emit a warning that this path should be rewritten. return nv, nil @@ -249,7 +274,7 @@ func (t *translateContext) rewriteRelativeTo(p dyn.Path, v dyn.Value, fn rewrite return dyn.InvalidValue, err } -func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { +func (m *translatePaths) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { t := &translateContext{ b: b, seen: make(map[string]string), @@ -257,13 +282,13 @@ func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnos err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { var err error - for _, fn := range []func(dyn.Value) (dyn.Value, error){ + for _, fn := range []func(ctx context.Context, v dyn.Value) (dyn.Value, error){ t.applyJobTranslations, t.applyPipelineTranslations, t.applyArtifactTranslations, t.applyDashboardTranslations, } { - v, err = fn(v) + v, err = fn(ctx, v) if err != nil { return dyn.InvalidValue, err } diff --git a/bundle/config/mutator/translate_paths_artifacts.go b/bundle/config/mutator/translate_paths_artifacts.go index 921c00c73..44e034204 100644 --- a/bundle/config/mutator/translate_paths_artifacts.go +++ b/bundle/config/mutator/translate_paths_artifacts.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" @@ -27,7 +28,7 @@ func (t *translateContext) artifactRewritePatterns() []artifactRewritePattern { } } -func (t *translateContext) applyArtifactTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyArtifactTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error for _, rewritePattern := range t.artifactRewritePatterns() { @@ -38,7 +39,7 @@ func (t *translateContext) applyArtifactTranslations(v dyn.Value) (dyn.Value, er return dyn.InvalidValue, fmt.Errorf("unable to determine directory for artifact %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, "") + return t.rewriteRelativeTo(ctx, p, v, rewritePattern.fn, dir, "") }) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/translate_paths_dashboards.go b/bundle/config/mutator/translate_paths_dashboards.go index 93822a599..a6aac82e9 100644 --- a/bundle/config/mutator/translate_paths_dashboards.go +++ b/bundle/config/mutator/translate_paths_dashboards.go @@ -1,12 +1,13 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" ) -func (t *translateContext) applyDashboardTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyDashboardTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { // Convert the `file_path` field to a local absolute path. // We load the file at this path and use its contents for the dashboard contents. pattern := dyn.NewPattern( @@ -23,6 +24,6 @@ func (t *translateContext) applyDashboardTranslations(v dyn.Value) (dyn.Value, e return dyn.InvalidValue, fmt.Errorf("unable to determine directory for dashboard %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, t.retainLocalAbsoluteFilePath, dir, "") + return t.rewriteRelativeTo(ctx, p, v, t.retainLocalAbsoluteFilePath, dir, "") }) } diff --git a/bundle/config/mutator/translate_paths_jobs.go b/bundle/config/mutator/translate_paths_jobs.go index c29ff0ea9..4165c0c07 100644 --- a/bundle/config/mutator/translate_paths_jobs.go +++ b/bundle/config/mutator/translate_paths_jobs.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "slices" @@ -9,7 +10,7 @@ import ( "github.com/databricks/cli/libs/dyn" ) -func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyJobTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error fallback, err := gatherFallbackPaths(v, "jobs") @@ -43,7 +44,7 @@ func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) return dyn.InvalidValue, err } - return t.rewriteRelativeTo(p, v, rewritePatternFn, dir, fallback[key]) + return t.rewriteRelativeTo(ctx, p, v, rewritePatternFn, dir, fallback[key]) }) } diff --git a/bundle/config/mutator/translate_paths_pipelines.go b/bundle/config/mutator/translate_paths_pipelines.go index 71a65e846..584da825a 100644 --- a/bundle/config/mutator/translate_paths_pipelines.go +++ b/bundle/config/mutator/translate_paths_pipelines.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" @@ -34,7 +35,7 @@ func (t *translateContext) pipelineRewritePatterns() []pipelineRewritePattern { } } -func (t *translateContext) applyPipelineTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyPipelineTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error fallback, err := gatherFallbackPaths(v, "pipelines") @@ -50,7 +51,7 @@ func (t *translateContext) applyPipelineTranslations(v dyn.Value) (dyn.Value, er return dyn.InvalidValue, fmt.Errorf("unable to determine directory for pipeline %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, fallback[key]) + return t.rewriteRelativeTo(ctx, p, v, rewritePattern.fn, dir, fallback[key]) }) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index bf6ba15d8..c93470e5b 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -1002,3 +1002,93 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { b.Config.Resources.Pipelines["pipeline"].Libraries[1].Notebook.Path, ) } + +// TestGetLocalPath contains test cases for the GetLocalPath function. +func TestGetLocalPath(t *testing.T) { + testCases := []struct { + name string + input string + expected string + errMsg string + }{ + { + name: "EmptyPath", + input: "", + expected: "", + errMsg: "path cannot be empty", + }, + { + name: "AbsolutePathUnix", + input: "/usr/local/bin", + expected: "", + errMsg: "", + }, + { + name: "AbsolutePathWindows", + input: `C:\Program Files\`, + expected: ``, + errMsg: "", + }, + { + name: "RelativePath", + input: "./local/path", + expected: "root/src/local/path", + errMsg: "", + }, + { + name: "NestedRelativePath", + input: "../relative/path", + expected: "root/relative/path", + errMsg: "", + }, + { + name: "PathWithSpaces", + input: "path/with spaces and slash/", + expected: "root/src/path/with spaces and slash", + errMsg: "", + }, + { + name: "PathWithSpecialChars", + input: "path/with/@#$%^&*()!", + expected: "root/src/path/with/@#$%^&*()!", + errMsg: "", + }, + { + name: "DBFS path", + input: "dbfs:/some/path", + expected: "", + errMsg: "", + }, + { + name: "PathTraversal", + input: "path/with/../../../traversal", + expected: "root/traversal", + errMsg: "", + }, + { + name: "RelativeOutOfBundle", + input: "../../outside", + expected: "", + errMsg: "path '../../outside' is not contained in sync root path", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + b := &bundle.Bundle{ + SyncRootPath: "root", + } + ctx := context.Background() + localPath, _, err := mutator.GetLocalPath(ctx, b, "root/src", tc.input) + if tc.errMsg != "" { + require.Error(t, err) + assert.Contains(t, err.Error(), tc.errMsg) + } else { + require.NoError(t, err) + // On Windows, filepath.Join may return backslashes. Normalize to forward slashes for comparison. + normalizedResult := filepath.ToSlash(localPath) + assert.Equal(t, tc.expected, normalizedResult, "For test case: %s", tc.name) + } + }) + } +}