From 100a0516d43198b6421268b68dc65a61fe45c3e6 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 25 Jun 2024 12:04:22 +0200 Subject: [PATCH] Add context type and value to path rewriting (#1525) ## Changes For a future change where the inner rewriting functions need access to the underlying bundle, this change makes preparations. All values were passed via the stack before and adding yet another value would make the code less readable. ## Tests Unit tests pass. --- bundle/config/mutator/translate_paths.go | 69 +++++++++++-------- .../mutator/translate_paths_artifacts.go | 28 +++++--- bundle/config/mutator/translate_paths_jobs.go | 63 +++++++++-------- .../mutator/translate_paths_pipelines.go | 38 ++++++---- 4 files changed, 115 insertions(+), 83 deletions(-) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index d9ab9e9e8..4224eafd4 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -33,9 +33,7 @@ func (err ErrIsNotNotebook) Error() string { return fmt.Sprintf("file at %s is not a notebook", err.path) } -type translatePaths struct { - seen map[string]string -} +type translatePaths struct{} // TranslatePaths converts paths to local notebook files into paths in the workspace file system. func TranslatePaths() bundle.Mutator { @@ -48,6 +46,18 @@ func (m *translatePaths) Name() string { type rewriteFunc func(literal, localFullPath, localRelPath, remotePath string) (string, error) +// translateContext is a context for rewriting paths in a config. +// It is freshly instantiated on every mutator apply call. +// It provides access to the underlying bundle object such that +// it doesn't have to be passed around explicitly. +type translateContext struct { + b *bundle.Bundle + + // seen is a map of local paths to their corresponding remote paths. + // If a local path has already been successfully resolved, we do not need to resolve it again. + seen map[string]string +} + // rewritePath converts a given relative path from the loaded config to a new path based on the passed rewriting function // // It takes these arguments: @@ -57,14 +67,13 @@ type rewriteFunc func(literal, localFullPath, localRelPath, remotePath string) ( // This logic is different between regular files or notebooks. // // The function returns an error if it is impossible to rewrite the given relative path. -func (m *translatePaths) rewritePath( +func (t *translateContext) rewritePath( dir string, - b *bundle.Bundle, p *string, fn rewriteFunc, ) error { // We assume absolute paths point to a location in the workspace - if path.IsAbs(filepath.ToSlash(*p)) { + if path.IsAbs(*p) { return nil } @@ -80,13 +89,14 @@ func (m *translatePaths) rewritePath( // Local path is relative to the directory the resource was defined in. localPath := filepath.Join(dir, filepath.FromSlash(*p)) - if interp, ok := m.seen[localPath]; ok { + if interp, ok := t.seen[localPath]; ok { *p = interp return nil } - // Remote path must be relative to the bundle root. - localRelPath, err := filepath.Rel(b.RootPath, localPath) + // Local path must be contained in the bundle root. + // If it isn't, it won't be synchronized into the workspace. + localRelPath, err := filepath.Rel(t.b.RootPath, localPath) if err != nil { return err } @@ -95,20 +105,20 @@ func (m *translatePaths) rewritePath( } // Prefix remote path with its remote root path. - remotePath := path.Join(b.Config.Workspace.FilePath, filepath.ToSlash(localRelPath)) + remotePath := path.Join(t.b.Config.Workspace.FilePath, filepath.ToSlash(localRelPath)) // Convert local path into workspace path via specified function. - interp, err := fn(*p, localPath, localRelPath, filepath.ToSlash(remotePath)) + interp, err := fn(*p, localPath, localRelPath, remotePath) if err != nil { return err } *p = interp - m.seen[localPath] = interp + t.seen[localPath] = interp return nil } -func translateNotebookPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { +func (t *translateContext) translateNotebookPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { nb, _, err := notebook.Detect(localFullPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("notebook %s not found", literal) @@ -124,7 +134,7 @@ func translateNotebookPath(literal, localFullPath, localRelPath, remotePath stri return strings.TrimSuffix(remotePath, filepath.Ext(localFullPath)), nil } -func translateFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { +func (t *translateContext) translateFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { nb, _, err := notebook.Detect(localFullPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) @@ -138,7 +148,7 @@ func translateFilePath(literal, localFullPath, localRelPath, remotePath string) return remotePath, nil } -func translateDirectoryPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { +func (t *translateContext) translateDirectoryPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { info, err := os.Stat(localFullPath) if err != nil { return "", err @@ -149,20 +159,20 @@ func translateDirectoryPath(literal, localFullPath, localRelPath, remotePath str return remotePath, nil } -func translateNoOp(literal, localFullPath, localRelPath, remotePath string) (string, error) { +func (t *translateContext) translateNoOp(literal, localFullPath, localRelPath, remotePath string) (string, error) { return localRelPath, nil } -func translateNoOpWithPrefix(literal, localFullPath, localRelPath, remotePath string) (string, error) { +func (t *translateContext) translateNoOpWithPrefix(literal, localFullPath, localRelPath, remotePath string) (string, error) { if !strings.HasPrefix(localRelPath, ".") { localRelPath = "." + string(filepath.Separator) + localRelPath } return localRelPath, nil } -func (m *translatePaths) rewriteValue(b *bundle.Bundle, p dyn.Path, v dyn.Value, fn rewriteFunc, dir string) (dyn.Value, error) { +func (t *translateContext) rewriteValue(p dyn.Path, v dyn.Value, fn rewriteFunc, dir string) (dyn.Value, error) { out := v.MustString() - err := m.rewritePath(dir, b, &out, fn) + err := t.rewritePath(dir, &out, fn) if err != nil { if target := (&ErrIsNotebook{}); errors.As(err, target) { return dyn.InvalidValue, fmt.Errorf(`expected a file for "%s" but got a notebook: %w`, p, target) @@ -176,15 +186,15 @@ func (m *translatePaths) rewriteValue(b *bundle.Bundle, p dyn.Path, v dyn.Value, return dyn.NewValue(out, v.Location()), nil } -func (m *translatePaths) rewriteRelativeTo(b *bundle.Bundle, p dyn.Path, v dyn.Value, fn rewriteFunc, dir, fallback string) (dyn.Value, error) { - nv, err := m.rewriteValue(b, p, v, fn, dir) +func (t *translateContext) rewriteRelativeTo(p dyn.Path, v dyn.Value, fn rewriteFunc, dir, fallback string) (dyn.Value, error) { + nv, err := t.rewriteValue(p, v, fn, dir) if err == nil { return nv, nil } // If we failed to rewrite the path, try to rewrite it relative to the fallback directory. if fallback != "" { - nv, nerr := m.rewriteValue(b, p, v, fn, fallback) + nv, nerr := t.rewriteValue(p, v, fn, fallback) if nerr == nil { // TODO: Emit a warning that this path should be rewritten. return nv, nil @@ -195,16 +205,19 @@ func (m *translatePaths) rewriteRelativeTo(b *bundle.Bundle, p dyn.Path, v dyn.V } func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { - m.seen = make(map[string]string) + t := &translateContext{ + b: b, + seen: make(map[string]string), + } err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { var err error - for _, fn := range []func(*bundle.Bundle, dyn.Value) (dyn.Value, error){ - m.applyJobTranslations, - m.applyPipelineTranslations, - m.applyArtifactTranslations, + for _, fn := range []func(dyn.Value) (dyn.Value, error){ + t.applyJobTranslations, + t.applyPipelineTranslations, + t.applyArtifactTranslations, } { - v, err = fn(b, v) + v, err = fn(v) if err != nil { return dyn.InvalidValue, err } diff --git a/bundle/config/mutator/translate_paths_artifacts.go b/bundle/config/mutator/translate_paths_artifacts.go index 7bda04eec..921c00c73 100644 --- a/bundle/config/mutator/translate_paths_artifacts.go +++ b/bundle/config/mutator/translate_paths_artifacts.go @@ -3,36 +3,42 @@ package mutator import ( "fmt" - "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/dyn" ) -func (m *translatePaths) applyArtifactTranslations(b *bundle.Bundle, v dyn.Value) (dyn.Value, error) { - var err error +type artifactRewritePattern struct { + pattern dyn.Pattern + fn rewriteFunc +} +func (t *translateContext) artifactRewritePatterns() []artifactRewritePattern { // Base pattern to match all artifacts. base := dyn.NewPattern( dyn.Key("artifacts"), dyn.AnyKey(), ) - for _, t := range []struct { - pattern dyn.Pattern - fn rewriteFunc - }{ + // Compile list of configuration paths to rewrite. + return []artifactRewritePattern{ { base.Append(dyn.Key("path")), - translateNoOp, + t.translateNoOp, }, - } { - v, err = dyn.MapByPattern(v, t.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + } +} + +func (t *translateContext) applyArtifactTranslations(v dyn.Value) (dyn.Value, error) { + var err error + + for _, rewritePattern := range t.artifactRewritePatterns() { + v, err = dyn.MapByPattern(v, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { key := p[1].Key() dir, err := v.Location().Directory() if err != nil { return dyn.InvalidValue, fmt.Errorf("unable to determine directory for artifact %s: %w", key, err) } - return m.rewriteRelativeTo(b, p, v, t.fn, dir, "") + return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, "") }) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/translate_paths_jobs.go b/bundle/config/mutator/translate_paths_jobs.go index 58b5e0fb0..60cc8bb9a 100644 --- a/bundle/config/mutator/translate_paths_jobs.go +++ b/bundle/config/mutator/translate_paths_jobs.go @@ -4,7 +4,6 @@ import ( "fmt" "slices" - "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/libs/dyn" ) @@ -19,55 +18,42 @@ func noSkipRewrite(string) bool { return false } -func rewritePatterns(base dyn.Pattern) []jobRewritePattern { +func rewritePatterns(t *translateContext, base dyn.Pattern) []jobRewritePattern { return []jobRewritePattern{ { base.Append(dyn.Key("notebook_task"), dyn.Key("notebook_path")), - translateNotebookPath, + t.translateNotebookPath, noSkipRewrite, }, { base.Append(dyn.Key("spark_python_task"), dyn.Key("python_file")), - translateFilePath, + t.translateFilePath, noSkipRewrite, }, { base.Append(dyn.Key("dbt_task"), dyn.Key("project_directory")), - translateDirectoryPath, + t.translateDirectoryPath, noSkipRewrite, }, { base.Append(dyn.Key("sql_task"), dyn.Key("file"), dyn.Key("path")), - translateFilePath, + t.translateFilePath, noSkipRewrite, }, { base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("whl")), - translateNoOp, + t.translateNoOp, noSkipRewrite, }, { base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("jar")), - translateNoOp, + t.translateNoOp, noSkipRewrite, }, } } -func (m *translatePaths) applyJobTranslations(b *bundle.Bundle, v dyn.Value) (dyn.Value, error) { - fallback, err := gatherFallbackPaths(v, "jobs") - if err != nil { - return dyn.InvalidValue, err - } - - // Do not translate job task paths if using Git source - var ignore []string - for key, job := range b.Config.Resources.Jobs { - if job.GitSource != nil { - ignore = append(ignore, key) - } - } - +func (t *translateContext) jobRewritePatterns() []jobRewritePattern { // Base pattern to match all tasks in all jobs. base := dyn.NewPattern( dyn.Key("resources"), @@ -90,19 +76,38 @@ func (m *translatePaths) applyJobTranslations(b *bundle.Bundle, v dyn.Value) (dy dyn.Key("dependencies"), dyn.AnyIndex(), ), - translateNoOpWithPrefix, + t.translateNoOpWithPrefix, func(s string) bool { return !libraries.IsEnvironmentDependencyLocal(s) }, }, } - taskPatterns := rewritePatterns(base) - forEachPatterns := rewritePatterns(base.Append(dyn.Key("for_each_task"), dyn.Key("task"))) + + taskPatterns := rewritePatterns(t, base) + forEachPatterns := rewritePatterns(t, base.Append(dyn.Key("for_each_task"), dyn.Key("task"))) allPatterns := append(taskPatterns, jobEnvironmentsPatterns...) allPatterns = append(allPatterns, forEachPatterns...) + return allPatterns +} - for _, t := range allPatterns { - v, err = dyn.MapByPattern(v, t.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) { + var err error + + fallback, err := gatherFallbackPaths(v, "jobs") + if err != nil { + return dyn.InvalidValue, err + } + + // Do not translate job task paths if using Git source + var ignore []string + for key, job := range t.b.Config.Resources.Jobs { + if job.GitSource != nil { + ignore = append(ignore, key) + } + } + + for _, rewritePattern := range t.jobRewritePatterns() { + v, err = dyn.MapByPattern(v, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { key := p[2].Key() // Skip path translation if the job is using git source. @@ -116,10 +121,10 @@ func (m *translatePaths) applyJobTranslations(b *bundle.Bundle, v dyn.Value) (dy } sv := v.MustString() - if t.skipRewrite(sv) { + if rewritePattern.skipRewrite(sv) { return v, nil } - return m.rewriteRelativeTo(b, p, v, t.fn, dir, fallback[key]) + return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, fallback[key]) }) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/translate_paths_pipelines.go b/bundle/config/mutator/translate_paths_pipelines.go index 5b2a2c346..71a65e846 100644 --- a/bundle/config/mutator/translate_paths_pipelines.go +++ b/bundle/config/mutator/translate_paths_pipelines.go @@ -3,16 +3,15 @@ package mutator import ( "fmt" - "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/dyn" ) -func (m *translatePaths) applyPipelineTranslations(b *bundle.Bundle, v dyn.Value) (dyn.Value, error) { - fallback, err := gatherFallbackPaths(v, "pipelines") - if err != nil { - return dyn.InvalidValue, err - } +type pipelineRewritePattern struct { + pattern dyn.Pattern + fn rewriteFunc +} +func (t *translateContext) pipelineRewritePatterns() []pipelineRewritePattern { // Base pattern to match all libraries in all pipelines. base := dyn.NewPattern( dyn.Key("resources"), @@ -22,27 +21,36 @@ func (m *translatePaths) applyPipelineTranslations(b *bundle.Bundle, v dyn.Value dyn.AnyIndex(), ) - for _, t := range []struct { - pattern dyn.Pattern - fn rewriteFunc - }{ + // Compile list of configuration paths to rewrite. + return []pipelineRewritePattern{ { base.Append(dyn.Key("notebook"), dyn.Key("path")), - translateNotebookPath, + t.translateNotebookPath, }, { base.Append(dyn.Key("file"), dyn.Key("path")), - translateFilePath, + t.translateFilePath, }, - } { - v, err = dyn.MapByPattern(v, t.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + } +} + +func (t *translateContext) applyPipelineTranslations(v dyn.Value) (dyn.Value, error) { + var err error + + fallback, err := gatherFallbackPaths(v, "pipelines") + if err != nil { + return dyn.InvalidValue, err + } + + for _, rewritePattern := range t.pipelineRewritePatterns() { + v, err = dyn.MapByPattern(v, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { key := p[2].Key() dir, err := v.Location().Directory() if err != nil { return dyn.InvalidValue, fmt.Errorf("unable to determine directory for pipeline %s: %w", key, err) } - return m.rewriteRelativeTo(b, p, v, t.fn, dir, fallback[key]) + return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, fallback[key]) }) if err != nil { return dyn.InvalidValue, err