From b3c044c461be0067ff355973a65d1c1a0e6b5db0 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 3 Jul 2024 12:13:22 +0200 Subject: [PATCH] Use `vfs.Path` for filesystem interaction (#1554) ## Changes Note: this doesn't cover _all_ filesystem interaction. To intercept calls where read or stat files to determine their type, we need a layer between our code and the `os` package calls that interact with the local file system. Interception is necessary to accommodate differences between a regular local file system and the FUSE-mounted Workspace File System when running the CLI on DBR. This change makes use of #1452 in the bundle struct. It uses #1525 to access the bundle variable in path rewriting. ## Tests * Unit tests pass. * Integration tests pass. --- bundle/bundle.go | 12 ++++-- bundle/bundle_read_only.go | 5 +++ bundle/config/mutator/load_git_details.go | 3 +- bundle/config/mutator/translate_paths.go | 7 ++-- bundle/config/mutator/translate_paths_test.go | 40 +++++++++++++------ .../config/validate/validate_sync_patterns.go | 3 +- bundle/deploy/files/sync.go | 3 +- bundle/deploy/state.go | 10 ++--- bundle/deploy/state_pull.go | 2 +- bundle/deploy/state_pull_test.go | 5 ++- bundle/deploy/state_test.go | 5 ++- cmd/sync/sync_test.go | 4 +- 12 files changed, 61 insertions(+), 38 deletions(-) diff --git a/bundle/bundle.go b/bundle/bundle.go index 482614b9..032d98ab 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -17,7 +17,6 @@ import ( "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/metadata" "github.com/databricks/cli/libs/fileset" - "github.com/databricks/cli/libs/folders" "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/locker" "github.com/databricks/cli/libs/log" @@ -36,6 +35,10 @@ type Bundle struct { // It is set when we instantiate a new bundle instance. RootPath string + // BundleRoot is a virtual filesystem path to the root of the bundle. + // Exclusively use this field for filesystem operations. + BundleRoot vfs.Path + Config config.Root // Metadata about the bundle deployment. This is the interface Databricks services @@ -73,7 +76,8 @@ type Bundle struct { func Load(ctx context.Context, path string) (*Bundle, error) { b := &Bundle{ - RootPath: filepath.Clean(path), + RootPath: filepath.Clean(path), + BundleRoot: vfs.MustNew(path), } configFile, err := config.FileNames.FindInPath(path) if err != nil { @@ -208,12 +212,12 @@ func (b *Bundle) GetSyncIncludePatterns(ctx context.Context) ([]string, error) { } func (b *Bundle) GitRepository() (*git.Repository, error) { - rootPath, err := folders.FindDirWithLeaf(b.RootPath, ".git") + _, err := vfs.FindLeafInTree(b.BundleRoot, ".git") if err != nil { return nil, fmt.Errorf("unable to locate repository root: %w", err) } - return git.NewRepository(vfs.MustNew(rootPath)) + return git.NewRepository(b.BundleRoot) } // AuthEnv returns a map with environment variables and their values diff --git a/bundle/bundle_read_only.go b/bundle/bundle_read_only.go index e4a4f993..59084f2a 100644 --- a/bundle/bundle_read_only.go +++ b/bundle/bundle_read_only.go @@ -4,6 +4,7 @@ import ( "context" "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go" ) @@ -23,6 +24,10 @@ func (r ReadOnlyBundle) RootPath() string { return r.b.RootPath } +func (r ReadOnlyBundle) BundleRoot() vfs.Path { + return r.b.BundleRoot +} + func (r ReadOnlyBundle) WorkspaceClient() *databricks.WorkspaceClient { return r.b.WorkspaceClient() } diff --git a/bundle/config/mutator/load_git_details.go b/bundle/config/mutator/load_git_details.go index d8b76f39..9b1c963c 100644 --- a/bundle/config/mutator/load_git_details.go +++ b/bundle/config/mutator/load_git_details.go @@ -8,7 +8,6 @@ import ( "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/log" - "github.com/databricks/cli/libs/vfs" ) type loadGitDetails struct{} @@ -23,7 +22,7 @@ func (m *loadGitDetails) Name() string { func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { // Load relevant git repository - repo, err := git.NewRepository(vfs.MustNew(b.RootPath)) + repo, err := git.NewRepository(b.BundleRoot) if err != nil { return diag.FromErr(err) } diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 4224eafd..a01d3d6a 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -6,7 +6,6 @@ import ( "fmt" "io/fs" "net/url" - "os" "path" "path/filepath" "strings" @@ -119,7 +118,7 @@ func (t *translateContext) rewritePath( } func (t *translateContext) translateNotebookPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.Detect(localFullPath) + nb, _, err := notebook.DetectWithFS(t.b.BundleRoot, filepath.ToSlash(localRelPath)) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("notebook %s not found", literal) } @@ -135,7 +134,7 @@ func (t *translateContext) translateNotebookPath(literal, localFullPath, localRe } func (t *translateContext) translateFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.Detect(localFullPath) + nb, _, err := notebook.DetectWithFS(t.b.BundleRoot, filepath.ToSlash(localRelPath)) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } @@ -149,7 +148,7 @@ func (t *translateContext) translateFilePath(literal, localFullPath, localRelPat } func (t *translateContext) translateDirectoryPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - info, err := os.Stat(localFullPath) + info, err := t.b.BundleRoot.Stat(filepath.ToSlash(localRelPath)) if err != nil { return "", err } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 29afb997..8476ee38 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -12,6 +12,7 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/pipelines" @@ -37,7 +38,8 @@ func touchEmptyFile(t *testing.T, path string) { func TestTranslatePathsSkippedWithGitSource(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -107,7 +109,8 @@ func TestTranslatePaths(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "dist", "task.jar")) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -274,7 +277,8 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "job", "my_dbt_project", "dbt_project.yml")) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -368,7 +372,8 @@ func TestTranslatePathsOutsideBundleRoot(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -401,7 +406,8 @@ func TestJobNotebookDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -431,7 +437,8 @@ func TestJobFileDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -461,7 +468,8 @@ func TestPipelineNotebookDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ @@ -491,7 +499,8 @@ func TestPipelineFileDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ @@ -522,7 +531,8 @@ func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) { touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -556,7 +566,8 @@ func TestJobNotebookTaskWithFileSourceError(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "my_file.py")) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -590,7 +601,8 @@ func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "my_file.py")) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -624,7 +636,8 @@ func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) { touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -659,7 +672,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "env2.py")) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + BundleRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ diff --git a/bundle/config/validate/validate_sync_patterns.go b/bundle/config/validate/validate_sync_patterns.go index 832efede..a04c1077 100644 --- a/bundle/config/validate/validate_sync_patterns.go +++ b/bundle/config/validate/validate_sync_patterns.go @@ -8,7 +8,6 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/fileset" - "github.com/databricks/cli/libs/vfs" "golang.org/x/sync/errgroup" ) @@ -51,7 +50,7 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di index := i p := pattern errs.Go(func() error { - fs, err := fileset.NewGlobSet(vfs.MustNew(rb.RootPath()), []string{p}) + fs, err := fileset.NewGlobSet(rb.BundleRoot(), []string{p}) if err != nil { return err } diff --git a/bundle/deploy/files/sync.go b/bundle/deploy/files/sync.go index 8d6efdae..a308668d 100644 --- a/bundle/deploy/files/sync.go +++ b/bundle/deploy/files/sync.go @@ -6,7 +6,6 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/sync" - "github.com/databricks/cli/libs/vfs" ) func GetSync(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.Sync, error) { @@ -29,7 +28,7 @@ func GetSyncOptions(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.SyncOp } opts := &sync.SyncOptions{ - LocalPath: vfs.MustNew(rb.RootPath()), + LocalPath: rb.BundleRoot(), RemotePath: rb.Config().Workspace.FilePath, Include: includes, Exclude: rb.Config().Sync.Exclude, diff --git a/bundle/deploy/state.go b/bundle/deploy/state.go index ccff64fe..97048811 100644 --- a/bundle/deploy/state.go +++ b/bundle/deploy/state.go @@ -6,7 +6,6 @@ import ( "fmt" "io" "io/fs" - "os" "path/filepath" "time" @@ -59,8 +58,8 @@ type entry struct { info fs.FileInfo } -func newEntry(path string) *entry { - info, err := os.Stat(path) +func newEntry(root vfs.Path, path string) *entry { + info, err := root.Stat(path) if err != nil { return &entry{path, nil} } @@ -111,11 +110,10 @@ func FromSlice(files []fileset.File) (Filelist, error) { return f, nil } -func (f Filelist) ToSlice(basePath string) []fileset.File { +func (f Filelist) ToSlice(root vfs.Path) []fileset.File { var files []fileset.File - root := vfs.MustNew(basePath) for _, file := range f { - entry := newEntry(filepath.Join(basePath, file.LocalPath)) + entry := newEntry(root, filepath.ToSlash(file.LocalPath)) // Snapshots created with versions <= v0.220.0 use platform-specific // paths (i.e. with backslashes). Files returned by [libs/fileset] always diff --git a/bundle/deploy/state_pull.go b/bundle/deploy/state_pull.go index 57b38ec6..24ed9d36 100644 --- a/bundle/deploy/state_pull.go +++ b/bundle/deploy/state_pull.go @@ -85,7 +85,7 @@ func (s *statePull) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic } log.Infof(ctx, "Creating new snapshot") - snapshot, err := sync.NewSnapshot(state.Files.ToSlice(b.RootPath), opts) + snapshot, err := sync.NewSnapshot(state.Files.ToSlice(b.BundleRoot), opts) if err != nil { return diag.FromErr(err) } diff --git a/bundle/deploy/state_pull_test.go b/bundle/deploy/state_pull_test.go index 409895a2..38f0b402 100644 --- a/bundle/deploy/state_pull_test.go +++ b/bundle/deploy/state_pull_test.go @@ -17,6 +17,7 @@ import ( "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/filer" "github.com/databricks/cli/libs/sync" + "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -59,8 +60,10 @@ func testStatePull(t *testing.T, opts statePullOpts) { return f, nil }} + tmpDir := t.TempDir() b := &bundle.Bundle{ - RootPath: t.TempDir(), + RootPath: tmpDir, + BundleRoot: vfs.MustNew(tmpDir), Config: config.Root{ Bundle: config.Bundle{ Target: "default", diff --git a/bundle/deploy/state_test.go b/bundle/deploy/state_test.go index efa051ab..5e1e5423 100644 --- a/bundle/deploy/state_test.go +++ b/bundle/deploy/state_test.go @@ -32,7 +32,8 @@ func TestFromSlice(t *testing.T) { func TestToSlice(t *testing.T) { tmpDir := t.TempDir() - fileset := fileset.New(vfs.MustNew(tmpDir)) + root := vfs.MustNew(tmpDir) + fileset := fileset.New(root) testutil.Touch(t, tmpDir, "test1.py") testutil.Touch(t, tmpDir, "test2.py") testutil.Touch(t, tmpDir, "test3.py") @@ -44,7 +45,7 @@ func TestToSlice(t *testing.T) { require.NoError(t, err) require.Len(t, f, 3) - s := f.ToSlice(tmpDir) + s := f.ToSlice(root) require.Len(t, s, 3) for _, file := range s { diff --git a/cmd/sync/sync_test.go b/cmd/sync/sync_test.go index b741e7b1..564aeae5 100644 --- a/cmd/sync/sync_test.go +++ b/cmd/sync/sync_test.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -16,7 +17,8 @@ import ( func TestSyncOptionsFromBundle(t *testing.T) { tempDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tempDir, + RootPath: tempDir, + BundleRoot: vfs.MustNew(tempDir), Config: config.Root{ Bundle: config.Bundle{ Target: "default",