From 424499ec1d56db0ef153b68357bb49257c2fe6aa Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 30 May 2024 09:41:50 +0200 Subject: [PATCH] Abstract over filesystem interaction with libs/vfs (#1452) ## Changes Introduce `libs/vfs` for an implementation of `fs.FS` and friends that _includes_ the absolute path it is anchored to. This is needed for: 1. Intercepting file operations to inject custom logic (e.g., logging, access control). 2. Traversing directories to find specific leaf directories (e.g., `.git`). 3. Converting virtual paths to OS-native paths. Options 2 and 3 are not possible with the standard `fs.FS` interface. They are needed such that we can provide an instance to the sync package and still detect the containing `.git` directory and convert paths to native paths. This change focuses on making the following packages use `vfs.Path`: * libs/fileset * libs/git * libs/sync All entries returned by `fileset.All` are now slash-separated. This has 2 consequences: * The sync snapshot now always uses slash-separated paths * We don't need to call `filepath.FromSlash` as much as we did ## Tests * All unit tests pass * All integration tests pass * Manually confirmed that a deployment made on Windows by a previous version of the CLI can be deployed by a new version of the CLI while retaining the validity of the local sync snapshot as well as the remote deployment state. --- bundle/bundle.go | 3 +- bundle/config/mutator/load_git_details.go | 3 +- .../config/validate/validate_sync_patterns.go | 3 +- bundle/deploy/files/sync.go | 3 +- bundle/deploy/state.go | 13 ++- bundle/deploy/state_test.go | 23 ++---- cmd/sync/sync.go | 3 +- cmd/sync/sync_test.go | 11 ++- internal/sync_test.go | 6 +- libs/fileset/file.go | 44 ++++++---- libs/fileset/file_test.go | 15 ++-- libs/fileset/fileset.go | 36 ++++---- libs/fileset/glob.go | 19 ++--- libs/fileset/glob_test.go | 82 ++++++------------- libs/git/config.go | 9 +- libs/git/fileset.go | 7 +- libs/git/fileset_test.go | 18 ++-- libs/git/ignore.go | 25 ++++-- libs/git/ignore_test.go | 7 +- libs/git/reference.go | 13 ++- libs/git/reference_test.go | 9 +- libs/git/repository.go | 49 +++++------ libs/git/repository_test.go | 17 ++-- libs/git/view.go | 35 ++++---- libs/git/view_test.go | 29 +++---- libs/notebook/detect.go | 21 +++-- libs/notebook/detect_jupyter.go | 20 +++-- libs/sync/diff.go | 5 +- libs/sync/dirset.go | 5 +- libs/sync/snapshot.go | 5 ++ libs/sync/snapshot_state.go | 32 +++++++- libs/sync/snapshot_state_test.go | 39 +++++++-- libs/sync/snapshot_test.go | 13 +-- libs/sync/sync.go | 6 +- libs/sync/sync_test.go | 23 +++--- libs/sync/watchdog.go | 4 +- libs/vfs/leaf.go | 29 +++++++ libs/vfs/leaf_test.go | 38 +++++++++ libs/vfs/os.go | 82 +++++++++++++++++++ libs/vfs/os_test.go | 54 ++++++++++++ libs/vfs/path.go | 29 +++++++ libs/vfs/path_test.go | 1 + 42 files changed, 603 insertions(+), 285 deletions(-) create mode 100644 libs/vfs/leaf.go create mode 100644 libs/vfs/leaf_test.go create mode 100644 libs/vfs/os.go create mode 100644 libs/vfs/os_test.go create mode 100644 libs/vfs/path.go create mode 100644 libs/vfs/path_test.go diff --git a/bundle/bundle.go b/bundle/bundle.go index 977ca224..1dc98656 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -22,6 +22,7 @@ import ( "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/tags" "github.com/databricks/cli/libs/terraform" + "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go" sdkconfig "github.com/databricks/databricks-sdk-go/config" "github.com/hashicorp/terraform-exec/tfexec" @@ -208,7 +209,7 @@ func (b *Bundle) GitRepository() (*git.Repository, error) { return nil, fmt.Errorf("unable to locate repository root: %w", err) } - return git.NewRepository(rootPath) + return git.NewRepository(vfs.MustNew(rootPath)) } // AuthEnv returns a map with environment variables and their values diff --git a/bundle/config/mutator/load_git_details.go b/bundle/config/mutator/load_git_details.go index 7ce8476f..d8b76f39 100644 --- a/bundle/config/mutator/load_git_details.go +++ b/bundle/config/mutator/load_git_details.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/vfs" ) type loadGitDetails struct{} @@ -22,7 +23,7 @@ func (m *loadGitDetails) Name() string { func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { // Load relevant git repository - repo, err := git.NewRepository(b.RootPath) + repo, err := git.NewRepository(vfs.MustNew(b.RootPath)) if err != nil { return diag.FromErr(err) } diff --git a/bundle/config/validate/validate_sync_patterns.go b/bundle/config/validate/validate_sync_patterns.go index 58acf6ae..832efede 100644 --- a/bundle/config/validate/validate_sync_patterns.go +++ b/bundle/config/validate/validate_sync_patterns.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/vfs" "golang.org/x/sync/errgroup" ) @@ -50,7 +51,7 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di index := i p := pattern errs.Go(func() error { - fs, err := fileset.NewGlobSet(rb.RootPath(), []string{p}) + fs, err := fileset.NewGlobSet(vfs.MustNew(rb.RootPath()), []string{p}) if err != nil { return err } diff --git a/bundle/deploy/files/sync.go b/bundle/deploy/files/sync.go index d78ab2d7..8d6efdae 100644 --- a/bundle/deploy/files/sync.go +++ b/bundle/deploy/files/sync.go @@ -6,6 +6,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/sync" + "github.com/databricks/cli/libs/vfs" ) func GetSync(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.Sync, error) { @@ -28,7 +29,7 @@ func GetSyncOptions(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.SyncOp } opts := &sync.SyncOptions{ - LocalPath: rb.RootPath(), + LocalPath: vfs.MustNew(rb.RootPath()), RemotePath: rb.Config().Workspace.FilePath, Include: includes, Exclude: rb.Config().Sync.Exclude, diff --git a/bundle/deploy/state.go b/bundle/deploy/state.go index ffcadc9d..ccff64fe 100644 --- a/bundle/deploy/state.go +++ b/bundle/deploy/state.go @@ -12,6 +12,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/vfs" ) const DeploymentStateFileName = "deployment.json" @@ -112,12 +113,18 @@ func FromSlice(files []fileset.File) (Filelist, error) { func (f Filelist) ToSlice(basePath string) []fileset.File { var files []fileset.File + root := vfs.MustNew(basePath) for _, file := range f { - absPath := filepath.Join(basePath, file.LocalPath) + entry := newEntry(filepath.Join(basePath, file.LocalPath)) + + // Snapshots created with versions <= v0.220.0 use platform-specific + // paths (i.e. with backslashes). Files returned by [libs/fileset] always + // contain forward slashes after this version. Normalize before using. + relative := filepath.ToSlash(file.LocalPath) if file.IsNotebook { - files = append(files, fileset.NewNotebookFile(newEntry(absPath), absPath, file.LocalPath)) + files = append(files, fileset.NewNotebookFile(root, entry, relative)) } else { - files = append(files, fileset.NewSourceFile(newEntry(absPath), absPath, file.LocalPath)) + files = append(files, fileset.NewSourceFile(root, entry, relative)) } } return files diff --git a/bundle/deploy/state_test.go b/bundle/deploy/state_test.go index 15bdc96b..efa051ab 100644 --- a/bundle/deploy/state_test.go +++ b/bundle/deploy/state_test.go @@ -3,17 +3,17 @@ package deploy import ( "bytes" "encoding/json" - "path/filepath" "testing" "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/require" ) func TestFromSlice(t *testing.T) { tmpDir := t.TempDir() - fileset := fileset.New(tmpDir) + fileset := fileset.New(vfs.MustNew(tmpDir)) testutil.Touch(t, tmpDir, "test1.py") testutil.Touch(t, tmpDir, "test2.py") testutil.Touch(t, tmpDir, "test3.py") @@ -32,7 +32,7 @@ func TestFromSlice(t *testing.T) { func TestToSlice(t *testing.T) { tmpDir := t.TempDir() - fileset := fileset.New(tmpDir) + fileset := fileset.New(vfs.MustNew(tmpDir)) testutil.Touch(t, tmpDir, "test1.py") testutil.Touch(t, tmpDir, "test2.py") testutil.Touch(t, tmpDir, "test3.py") @@ -48,18 +48,11 @@ func TestToSlice(t *testing.T) { require.Len(t, s, 3) for _, file := range s { - require.Contains(t, []string{"test1.py", "test2.py", "test3.py"}, file.Name()) - require.Contains(t, []string{ - filepath.Join(tmpDir, "test1.py"), - filepath.Join(tmpDir, "test2.py"), - filepath.Join(tmpDir, "test3.py"), - }, file.Absolute) - require.False(t, file.IsDir()) - require.NotZero(t, file.Type()) - info, err := file.Info() - require.NoError(t, err) - require.NotNil(t, info) - require.Equal(t, file.Name(), info.Name()) + require.Contains(t, []string{"test1.py", "test2.py", "test3.py"}, file.Relative) + + // If the mtime is not zero we know we produced a valid fs.DirEntry. + ts := file.Modified() + require.NotZero(t, ts) } } diff --git a/cmd/sync/sync.go b/cmd/sync/sync.go index 42550722..e5f1bfc9 100644 --- a/cmd/sync/sync.go +++ b/cmd/sync/sync.go @@ -14,6 +14,7 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/sync" + "github.com/databricks/cli/libs/vfs" "github.com/spf13/cobra" ) @@ -46,7 +47,7 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn } opts := sync.SyncOptions{ - LocalPath: args[0], + LocalPath: vfs.MustNew(args[0]), RemotePath: args[1], Full: f.full, PollInterval: f.interval, diff --git a/cmd/sync/sync_test.go b/cmd/sync/sync_test.go index 026d840f..b741e7b1 100644 --- a/cmd/sync/sync_test.go +++ b/cmd/sync/sync_test.go @@ -31,7 +31,7 @@ func TestSyncOptionsFromBundle(t *testing.T) { f := syncFlags{} opts, err := f.syncOptionsFromBundle(New(), []string{}, b) require.NoError(t, err) - assert.Equal(t, tempDir, opts.LocalPath) + assert.Equal(t, tempDir, opts.LocalPath.Native()) assert.Equal(t, "/Users/jane@doe.com/path", opts.RemotePath) assert.Equal(t, filepath.Join(tempDir, ".databricks", "bundle", "default"), opts.SnapshotBasePath) assert.NotNil(t, opts.WorkspaceClient) @@ -49,11 +49,14 @@ func TestSyncOptionsFromArgsRequiredTwoArgs(t *testing.T) { } func TestSyncOptionsFromArgs(t *testing.T) { + local := t.TempDir() + remote := "/remote" + f := syncFlags{} cmd := New() cmd.SetContext(root.SetWorkspaceClient(context.Background(), nil)) - opts, err := f.syncOptionsFromArgs(cmd, []string{"/local", "/remote"}) + opts, err := f.syncOptionsFromArgs(cmd, []string{local, remote}) require.NoError(t, err) - assert.Equal(t, "/local", opts.LocalPath) - assert.Equal(t, "/remote", opts.RemotePath) + assert.Equal(t, local, opts.LocalPath.Native()) + assert.Equal(t, remote, opts.RemotePath) } diff --git a/internal/sync_test.go b/internal/sync_test.go index f970a7ce..4021e649 100644 --- a/internal/sync_test.go +++ b/internal/sync_test.go @@ -313,7 +313,7 @@ func TestAccSyncNestedFolderSync(t *testing.T) { assertSync.remoteDirContent(ctx, "dir1", []string{"dir2"}) assertSync.remoteDirContent(ctx, "dir1/dir2", []string{"dir3"}) assertSync.remoteDirContent(ctx, "dir1/dir2/dir3", []string{"foo.txt"}) - assertSync.snapshotContains(append(repoFiles, ".gitignore", filepath.FromSlash("dir1/dir2/dir3/foo.txt"))) + assertSync.snapshotContains(append(repoFiles, ".gitignore", "dir1/dir2/dir3/foo.txt")) // delete f.Remove(t) @@ -374,7 +374,7 @@ func TestAccSyncNestedSpacePlusAndHashAreEscapedSync(t *testing.T) { assertSync.remoteDirContent(ctx, "dir1", []string{"a b+c"}) assertSync.remoteDirContent(ctx, "dir1/a b+c", []string{"c+d e"}) assertSync.remoteDirContent(ctx, "dir1/a b+c/c+d e", []string{"e+f g#i.txt"}) - assertSync.snapshotContains(append(repoFiles, ".gitignore", filepath.FromSlash("dir1/a b+c/c+d e/e+f g#i.txt"))) + assertSync.snapshotContains(append(repoFiles, ".gitignore", "dir1/a b+c/c+d e/e+f g#i.txt")) // delete f.Remove(t) @@ -404,7 +404,7 @@ func TestAccSyncIncrementalFileOverwritesFolder(t *testing.T) { assertSync.waitForCompletionMarker() assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo")) assertSync.remoteDirContent(ctx, "foo", []string{"bar.txt"}) - assertSync.snapshotContains(append(repoFiles, ".gitignore", filepath.FromSlash("foo/bar.txt"))) + assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo/bar.txt")) // delete foo/bar.txt f.Remove(t) diff --git a/libs/fileset/file.go b/libs/fileset/file.go index 17cae795..fd846b25 100644 --- a/libs/fileset/file.go +++ b/libs/fileset/file.go @@ -5,6 +5,7 @@ import ( "time" "github.com/databricks/cli/libs/notebook" + "github.com/databricks/cli/libs/vfs" ) type fileType int @@ -16,40 +17,49 @@ const ( ) type File struct { - fs.DirEntry - Absolute, Relative string - fileType fileType + // Root path of the fileset. + root vfs.Path + + // File entry as returned by the [fs.WalkDir] function. + entry fs.DirEntry + + // Type of the file. + fileType fileType + + // Relative path within the fileset. + // Combine with the [vfs.Path] to interact with the underlying file. + Relative string } -func NewNotebookFile(entry fs.DirEntry, absolute string, relative string) File { +func NewNotebookFile(root vfs.Path, entry fs.DirEntry, relative string) File { return File{ - DirEntry: entry, - Absolute: absolute, - Relative: relative, + root: root, + entry: entry, fileType: Notebook, + Relative: relative, } } -func NewFile(entry fs.DirEntry, absolute string, relative string) File { +func NewFile(root vfs.Path, entry fs.DirEntry, relative string) File { return File{ - DirEntry: entry, - Absolute: absolute, - Relative: relative, + root: root, + entry: entry, fileType: Unknown, + Relative: relative, } } -func NewSourceFile(entry fs.DirEntry, absolute string, relative string) File { +func NewSourceFile(root vfs.Path, entry fs.DirEntry, relative string) File { return File{ - DirEntry: entry, - Absolute: absolute, - Relative: relative, + root: root, + entry: entry, fileType: Source, + Relative: relative, } } func (f File) Modified() (ts time.Time) { - info, err := f.Info() + info, err := f.entry.Info() if err != nil { // return default time, beginning of epoch return ts @@ -63,7 +73,7 @@ func (f *File) IsNotebook() (bool, error) { } // Otherwise, detect the notebook type. - isNotebook, _, err := notebook.Detect(f.Absolute) + isNotebook, _, err := notebook.DetectWithFS(f.root, f.Relative) if err != nil { return false, err } diff --git a/libs/fileset/file_test.go b/libs/fileset/file_test.go index cdfc9ba1..1ce1ff59 100644 --- a/libs/fileset/file_test.go +++ b/libs/fileset/file_test.go @@ -1,22 +1,22 @@ package fileset import ( - "path/filepath" "testing" "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/require" ) func TestNotebookFileIsNotebook(t *testing.T) { - f := NewNotebookFile(nil, "", "") + f := NewNotebookFile(nil, nil, "") isNotebook, err := f.IsNotebook() require.NoError(t, err) require.True(t, isNotebook) } func TestSourceFileIsNotNotebook(t *testing.T) { - f := NewSourceFile(nil, "", "") + f := NewSourceFile(nil, nil, "") isNotebook, err := f.IsNotebook() require.NoError(t, err) require.False(t, isNotebook) @@ -24,18 +24,19 @@ func TestSourceFileIsNotNotebook(t *testing.T) { func TestUnknownFileDetectsNotebook(t *testing.T) { tmpDir := t.TempDir() + root := vfs.MustNew(tmpDir) t.Run("file", func(t *testing.T) { - path := testutil.Touch(t, tmpDir, "test.py") - f := NewFile(nil, path, filepath.Base(path)) + testutil.Touch(t, tmpDir, "test.py") + f := NewFile(root, nil, "test.py") isNotebook, err := f.IsNotebook() require.NoError(t, err) require.False(t, isNotebook) }) t.Run("notebook", func(t *testing.T) { - path := testutil.TouchNotebook(t, tmpDir, "notebook.py") - f := NewFile(nil, path, filepath.Base(path)) + testutil.TouchNotebook(t, tmpDir, "notebook.py") + f := NewFile(root, nil, "notebook.py") isNotebook, err := f.IsNotebook() require.NoError(t, err) require.True(t, isNotebook) diff --git a/libs/fileset/fileset.go b/libs/fileset/fileset.go index 52463dff..d0f00f97 100644 --- a/libs/fileset/fileset.go +++ b/libs/fileset/fileset.go @@ -4,20 +4,24 @@ import ( "fmt" "io/fs" "os" - "path/filepath" + + "github.com/databricks/cli/libs/vfs" ) // FileSet facilitates fast recursive file listing of a path. // It optionally takes into account ignore rules through the [Ignorer] interface. type FileSet struct { - root string + // Root path of the fileset. + root vfs.Path + + // Ignorer interface to check if a file or directory should be ignored. ignore Ignorer } // New returns a [FileSet] for the given root path. -func New(root string) *FileSet { +func New(root vfs.Path) *FileSet { return &FileSet{ - root: filepath.Clean(root), + root: root, ignore: nopIgnorer{}, } } @@ -32,11 +36,6 @@ func (w *FileSet) SetIgnorer(ignore Ignorer) { w.ignore = ignore } -// Return root for fileset. -func (w *FileSet) Root() string { - return w.root -} - // Return all tracked files for Repo func (w *FileSet) All() ([]File, error) { return w.recursiveListFiles() @@ -46,12 +45,7 @@ func (w *FileSet) All() ([]File, error) { // that are being tracked in the FileSet (ie not being ignored for matching one of the // patterns in w.ignore) func (w *FileSet) recursiveListFiles() (fileList []File, err error) { - err = filepath.WalkDir(w.root, func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - - relPath, err := filepath.Rel(w.root, path) + err = fs.WalkDir(w.root, ".", func(name string, d fs.DirEntry, err error) error { if err != nil { return err } @@ -66,25 +60,25 @@ func (w *FileSet) recursiveListFiles() (fileList []File, err error) { } if d.IsDir() { - ign, err := w.ignore.IgnoreDirectory(relPath) + ign, err := w.ignore.IgnoreDirectory(name) if err != nil { - return fmt.Errorf("cannot check if %s should be ignored: %w", relPath, err) + return fmt.Errorf("cannot check if %s should be ignored: %w", name, err) } if ign { - return filepath.SkipDir + return fs.SkipDir } return nil } - ign, err := w.ignore.IgnoreFile(relPath) + ign, err := w.ignore.IgnoreFile(name) if err != nil { - return fmt.Errorf("cannot check if %s should be ignored: %w", relPath, err) + return fmt.Errorf("cannot check if %s should be ignored: %w", name, err) } if ign { return nil } - fileList = append(fileList, NewFile(d, path, relPath)) + fileList = append(fileList, NewFile(w.root, d, name)) return nil }) return diff --git a/libs/fileset/glob.go b/libs/fileset/glob.go index 9d8626e5..0a103847 100644 --- a/libs/fileset/glob.go +++ b/libs/fileset/glob.go @@ -1,22 +1,17 @@ package fileset import ( - "path/filepath" + "path" + + "github.com/databricks/cli/libs/vfs" ) -func NewGlobSet(root string, includes []string) (*FileSet, error) { - absRoot, err := filepath.Abs(root) - if err != nil { - return nil, err - } - +func NewGlobSet(root vfs.Path, includes []string) (*FileSet, error) { for k := range includes { - includes[k] = filepath.ToSlash(filepath.Clean(includes[k])) + includes[k] = path.Clean(includes[k]) } - fs := &FileSet{ - absRoot, - newIncluder(includes), - } + fs := New(root) + fs.SetIgnorer(newIncluder(includes)) return fs, nil } diff --git a/libs/fileset/glob_test.go b/libs/fileset/glob_test.go index e8d3696c..70b9c444 100644 --- a/libs/fileset/glob_test.go +++ b/libs/fileset/glob_test.go @@ -2,21 +2,26 @@ package fileset import ( "io/fs" - "os" - "path/filepath" + "path" "slices" "strings" "testing" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/require" ) -func TestGlobFileset(t *testing.T) { - cwd, err := os.Getwd() - require.NoError(t, err) - root := filepath.Join(cwd, "..", "filer") +func collectRelativePaths(files []File) []string { + relativePaths := make([]string, 0) + for _, f := range files { + relativePaths = append(relativePaths, f.Relative) + } + return relativePaths +} - entries, err := os.ReadDir(root) +func TestGlobFileset(t *testing.T) { + root := vfs.MustNew("../filer") + entries, err := root.ReadDir(".") require.NoError(t, err) g, err := NewGlobSet(root, []string{ @@ -30,7 +35,7 @@ func TestGlobFileset(t *testing.T) { require.Equal(t, len(files), len(entries)) for _, f := range files { exists := slices.ContainsFunc(entries, func(de fs.DirEntry) bool { - return de.Name() == f.Name() + return de.Name() == path.Base(f.Relative) }) require.True(t, exists) } @@ -46,9 +51,8 @@ func TestGlobFileset(t *testing.T) { } func TestGlobFilesetWithRelativeRoot(t *testing.T) { - root := filepath.Join("..", "filer") - - entries, err := os.ReadDir(root) + root := vfs.MustNew("../filer") + entries, err := root.ReadDir(".") require.NoError(t, err) g, err := NewGlobSet(root, []string{ @@ -58,21 +62,14 @@ func TestGlobFilesetWithRelativeRoot(t *testing.T) { files, err := g.All() require.NoError(t, err) - require.Equal(t, len(files), len(entries)) - for _, f := range files { - require.True(t, filepath.IsAbs(f.Absolute)) - } } func TestGlobFilesetRecursively(t *testing.T) { - cwd, err := os.Getwd() - require.NoError(t, err) - root := filepath.Join(cwd, "..", "git") - + root := vfs.MustNew("../git") entries := make([]string, 0) - err = filepath.Walk(filepath.Join(root, "testdata"), func(path string, info fs.FileInfo, err error) error { - if !info.IsDir() { + err := fs.WalkDir(root, "testdata", func(path string, d fs.DirEntry, err error) error { + if !d.IsDir() { entries = append(entries, path) } return nil @@ -86,24 +83,14 @@ func TestGlobFilesetRecursively(t *testing.T) { files, err := g.All() require.NoError(t, err) - - require.Equal(t, len(files), len(entries)) - for _, f := range files { - exists := slices.ContainsFunc(entries, func(path string) bool { - return path == f.Absolute - }) - require.True(t, exists) - } + require.ElementsMatch(t, entries, collectRelativePaths(files)) } func TestGlobFilesetDir(t *testing.T) { - cwd, err := os.Getwd() - require.NoError(t, err) - root := filepath.Join(cwd, "..", "git") - + root := vfs.MustNew("../git") entries := make([]string, 0) - err = filepath.Walk(filepath.Join(root, "testdata", "a"), func(path string, info fs.FileInfo, err error) error { - if !info.IsDir() { + err := fs.WalkDir(root, "testdata/a", func(path string, d fs.DirEntry, err error) error { + if !d.IsDir() { entries = append(entries, path) } return nil @@ -117,23 +104,13 @@ func TestGlobFilesetDir(t *testing.T) { files, err := g.All() require.NoError(t, err) - - require.Equal(t, len(files), len(entries)) - for _, f := range files { - exists := slices.ContainsFunc(entries, func(path string) bool { - return path == f.Absolute - }) - require.True(t, exists) - } + require.ElementsMatch(t, entries, collectRelativePaths(files)) } func TestGlobFilesetDoubleQuotesWithFilePatterns(t *testing.T) { - cwd, err := os.Getwd() - require.NoError(t, err) - root := filepath.Join(cwd, "..", "git") - + root := vfs.MustNew("../git") entries := make([]string, 0) - err = filepath.Walk(filepath.Join(root, "testdata"), func(path string, info fs.FileInfo, err error) error { + err := fs.WalkDir(root, "testdata", func(path string, d fs.DirEntry, err error) error { if strings.HasSuffix(path, ".txt") { entries = append(entries, path) } @@ -148,12 +125,5 @@ func TestGlobFilesetDoubleQuotesWithFilePatterns(t *testing.T) { files, err := g.All() require.NoError(t, err) - - require.Equal(t, len(files), len(entries)) - for _, f := range files { - exists := slices.ContainsFunc(entries, func(path string) bool { - return path == f.Absolute - }) - require.True(t, exists) - } + require.ElementsMatch(t, entries, collectRelativePaths(files)) } diff --git a/libs/git/config.go b/libs/git/config.go index e83c75b7..424d453b 100644 --- a/libs/git/config.go +++ b/libs/git/config.go @@ -8,6 +8,7 @@ import ( "regexp" "strings" + "github.com/databricks/cli/libs/vfs" "gopkg.in/ini.v1" ) @@ -87,8 +88,8 @@ func (c config) load(r io.Reader) error { return nil } -func (c config) loadFile(path string) error { - f, err := os.Open(path) +func (c config) loadFile(fs vfs.Path, path string) error { + f, err := fs.Open(path) if err != nil { // If the file doesn't exist it is ignored. // This is the case for both global and repository specific config files. @@ -152,8 +153,8 @@ func globalGitConfig() (*config, error) { // > are missing or unreadable they will be ignored. // // We therefore ignore the error return value for the calls below. - config.loadFile(filepath.Join(xdgConfigHome, "git/config")) - config.loadFile(filepath.Join(config.home, ".gitconfig")) + config.loadFile(vfs.MustNew(xdgConfigHome), "git/config") + config.loadFile(vfs.MustNew(config.home), ".gitconfig") return config, nil } diff --git a/libs/git/fileset.go b/libs/git/fileset.go index c604ac7f..f1986aa2 100644 --- a/libs/git/fileset.go +++ b/libs/git/fileset.go @@ -2,6 +2,7 @@ package git import ( "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/vfs" ) // FileSet is Git repository aware implementation of [fileset.FileSet]. @@ -13,7 +14,7 @@ type FileSet struct { } // NewFileSet returns [FileSet] for the Git repository located at `root`. -func NewFileSet(root string) (*FileSet, error) { +func NewFileSet(root vfs.Path) (*FileSet, error) { fs := fileset.New(root) v, err := NewView(root) if err != nil { @@ -34,10 +35,6 @@ func (f *FileSet) IgnoreDirectory(dir string) (bool, error) { return f.view.IgnoreDirectory(dir) } -func (f *FileSet) Root() string { - return f.fileset.Root() -} - func (f *FileSet) All() ([]fileset.File, error) { f.view.repo.taintIgnoreRules() return f.fileset.All() diff --git a/libs/git/fileset_test.go b/libs/git/fileset_test.go index 74133f52..4e6172bf 100644 --- a/libs/git/fileset_test.go +++ b/libs/git/fileset_test.go @@ -2,23 +2,25 @@ package git import ( "os" + "path" "path/filepath" "strings" "testing" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func testFileSetAll(t *testing.T, path string) { - fileSet, err := NewFileSet(path) +func testFileSetAll(t *testing.T, root string) { + fileSet, err := NewFileSet(vfs.MustNew(root)) require.NoError(t, err) files, err := fileSet.All() require.NoError(t, err) require.Len(t, files, 3) - assert.Equal(t, filepath.Join("a", "b", "world.txt"), files[0].Relative) - assert.Equal(t, filepath.Join("a", "hello.txt"), files[1].Relative) - assert.Equal(t, filepath.Join("databricks.yml"), files[2].Relative) + assert.Equal(t, path.Join("a", "b", "world.txt"), files[0].Relative) + assert.Equal(t, path.Join("a", "hello.txt"), files[1].Relative) + assert.Equal(t, path.Join("databricks.yml"), files[2].Relative) } func TestFileSetListAllInRepo(t *testing.T) { @@ -33,7 +35,7 @@ func TestFileSetNonCleanRoot(t *testing.T) { // Test what happens if the root directory can be simplified. // Path simplification is done by most filepath functions. // This should yield the same result as above test. - fileSet, err := NewFileSet("./testdata/../testdata") + fileSet, err := NewFileSet(vfs.MustNew("./testdata/../testdata")) require.NoError(t, err) files, err := fileSet.All() require.NoError(t, err) @@ -42,7 +44,7 @@ func TestFileSetNonCleanRoot(t *testing.T) { func TestFileSetAddsCacheDirToGitIgnore(t *testing.T) { projectDir := t.TempDir() - fileSet, err := NewFileSet(projectDir) + fileSet, err := NewFileSet(vfs.MustNew(projectDir)) require.NoError(t, err) fileSet.EnsureValidGitIgnoreExists() @@ -57,7 +59,7 @@ func TestFileSetDoesNotCacheDirToGitIgnoreIfAlreadyPresent(t *testing.T) { projectDir := t.TempDir() gitIgnorePath := filepath.Join(projectDir, ".gitignore") - fileSet, err := NewFileSet(projectDir) + fileSet, err := NewFileSet(vfs.MustNew(projectDir)) require.NoError(t, err) err = os.WriteFile(gitIgnorePath, []byte(".databricks"), 0o644) require.NoError(t, err) diff --git a/libs/git/ignore.go b/libs/git/ignore.go index ec66a2b2..df3a4e91 100644 --- a/libs/git/ignore.go +++ b/libs/git/ignore.go @@ -1,9 +1,12 @@ package git import ( + "io/fs" "os" + "strings" "time" + "github.com/databricks/cli/libs/vfs" ignore "github.com/sabhiram/go-gitignore" ) @@ -21,7 +24,8 @@ type ignoreRules interface { // ignoreFile represents a gitignore file backed by a path. // If the path doesn't exist (yet), it is treated as an empty file. type ignoreFile struct { - absPath string + root vfs.Path + path string // Signal a reload of this file. // Set this to call [os.Stat] and a potential reload @@ -35,9 +39,10 @@ type ignoreFile struct { patterns *ignore.GitIgnore } -func newIgnoreFile(absPath string) ignoreRules { +func newIgnoreFile(root vfs.Path, path string) ignoreRules { return &ignoreFile{ - absPath: absPath, + root: root, + path: path, checkForReload: true, } } @@ -67,7 +72,7 @@ func (f *ignoreFile) Taint() { func (f *ignoreFile) load() error { // The file must be stat-able. // If it doesn't exist, treat it as an empty file. - stat, err := os.Stat(f.absPath) + stat, err := fs.Stat(f.root, f.path) if err != nil { if os.IsNotExist(err) { return nil @@ -82,7 +87,7 @@ func (f *ignoreFile) load() error { } f.modTime = stat.ModTime() - f.patterns, err = ignore.CompileIgnoreFile(f.absPath) + f.patterns, err = f.loadGitignore() if err != nil { return err } @@ -90,6 +95,16 @@ func (f *ignoreFile) load() error { return nil } +func (f *ignoreFile) loadGitignore() (*ignore.GitIgnore, error) { + data, err := fs.ReadFile(f.root, f.path) + if err != nil { + return nil, err + } + + lines := strings.Split(string(data), "\n") + return ignore.CompileIgnoreLines(lines...), nil +} + // stringIgnoreRules implements the [ignoreRules] interface // for a set of in-memory ignore patterns. type stringIgnoreRules struct { diff --git a/libs/git/ignore_test.go b/libs/git/ignore_test.go index 160f53d7..057c0cb2 100644 --- a/libs/git/ignore_test.go +++ b/libs/git/ignore_test.go @@ -5,6 +5,7 @@ import ( "path/filepath" "testing" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -13,7 +14,7 @@ func TestIgnoreFile(t *testing.T) { var ign bool var err error - f := newIgnoreFile("./testdata/.gitignore") + f := newIgnoreFile(vfs.MustNew("testdata"), ".gitignore") ign, err = f.MatchesPath("root.foo") require.NoError(t, err) assert.True(t, ign) @@ -27,7 +28,7 @@ func TestIgnoreFileDoesntExist(t *testing.T) { var err error // Files that don't exist are treated as an empty gitignore file. - f := newIgnoreFile("./testdata/thispathdoesntexist") + f := newIgnoreFile(vfs.MustNew("testdata"), "thispathdoesntexist") ign, err = f.MatchesPath("i'm included") require.NoError(t, err) assert.False(t, ign) @@ -41,7 +42,7 @@ func TestIgnoreFileTaint(t *testing.T) { gitIgnorePath := filepath.Join(tempDir, ".gitignore") // Files that don't exist are treated as an empty gitignore file. - f := newIgnoreFile(gitIgnorePath) + f := newIgnoreFile(vfs.MustNew(tempDir), ".gitignore") ign, err = f.MatchesPath("hello") require.NoError(t, err) assert.False(t, ign) diff --git a/libs/git/reference.go b/libs/git/reference.go index 4021f2e6..2b4bd3e4 100644 --- a/libs/git/reference.go +++ b/libs/git/reference.go @@ -2,10 +2,12 @@ package git import ( "fmt" + "io/fs" "os" - "path/filepath" "regexp" "strings" + + "github.com/databricks/cli/libs/vfs" ) type ReferenceType string @@ -37,9 +39,9 @@ func isSHA1(s string) bool { return re.MatchString(s) } -func LoadReferenceFile(path string) (*Reference, error) { +func LoadReferenceFile(root vfs.Path, path string) (*Reference, error) { // read reference file content - b, err := os.ReadFile(path) + b, err := fs.ReadFile(root, path) if os.IsNotExist(err) { return nil, nil } @@ -73,8 +75,7 @@ func (ref *Reference) ResolvePath() (string, error) { if ref.Type != ReferenceTypePointer { return "", ErrNotAReferencePointer } - refPath := strings.TrimPrefix(ref.Content, ReferencePrefix) - return filepath.FromSlash(refPath), nil + return strings.TrimPrefix(ref.Content, ReferencePrefix), nil } // resolves the name of the current branch from the reference file content. For example @@ -87,8 +88,6 @@ func (ref *Reference) CurrentBranch() (string, error) { if err != nil { return "", err } - // normalize branch ref path to work accross different operating systems - branchRefPath = filepath.ToSlash(branchRefPath) if !strings.HasPrefix(branchRefPath, HeadPathPrefix) { return "", fmt.Errorf("reference path %s does not have expected prefix %s", branchRefPath, HeadPathPrefix) } diff --git a/libs/git/reference_test.go b/libs/git/reference_test.go index 1b08e989..194d7933 100644 --- a/libs/git/reference_test.go +++ b/libs/git/reference_test.go @@ -6,6 +6,7 @@ import ( "strings" "testing" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -45,7 +46,7 @@ func TestReferenceReferencePathForReference(t *testing.T) { } path, err := ref.ResolvePath() assert.NoError(t, err) - assert.Equal(t, filepath.FromSlash("refs/heads/my-branch"), path) + assert.Equal(t, "refs/heads/my-branch", path) } func TestReferenceLoadingForObjectID(t *testing.T) { @@ -55,7 +56,7 @@ func TestReferenceLoadingForObjectID(t *testing.T) { defer f.Close() f.WriteString(strings.Repeat("e", 40) + "\r\n") - ref, err := LoadReferenceFile(filepath.Join(tmp, "HEAD")) + ref, err := LoadReferenceFile(vfs.MustNew(tmp), "HEAD") assert.NoError(t, err) assert.Equal(t, ReferenceTypeSHA1, ref.Type) assert.Equal(t, strings.Repeat("e", 40), ref.Content) @@ -68,7 +69,7 @@ func TestReferenceLoadingForReference(t *testing.T) { defer f.Close() f.WriteString("ref: refs/heads/foo\n") - ref, err := LoadReferenceFile(filepath.Join(tmp, "HEAD")) + ref, err := LoadReferenceFile(vfs.MustNew(tmp), "HEAD") assert.NoError(t, err) assert.Equal(t, ReferenceTypePointer, ref.Type) assert.Equal(t, "ref: refs/heads/foo", ref.Content) @@ -81,7 +82,7 @@ func TestReferenceLoadingFailsForInvalidContent(t *testing.T) { defer f.Close() f.WriteString("abc") - _, err = LoadReferenceFile(filepath.Join(tmp, "HEAD")) + _, err = LoadReferenceFile(vfs.MustNew(tmp), "HEAD") assert.ErrorContains(t, err, "unknown format for git HEAD") } diff --git a/libs/git/repository.go b/libs/git/repository.go index 531fd74e..6baf26c2 100644 --- a/libs/git/repository.go +++ b/libs/git/repository.go @@ -7,7 +7,7 @@ import ( "path/filepath" "strings" - "github.com/databricks/cli/libs/folders" + "github.com/databricks/cli/libs/vfs" ) const gitIgnoreFileName = ".gitignore" @@ -21,8 +21,8 @@ type Repository struct { // directory where we process .gitignore files. real bool - // rootPath is the absolute path to the repository root. - rootPath string + // root is the absolute path to the repository root. + root vfs.Path // ignore contains a list of ignore patterns indexed by the // path prefix relative to the repository root. @@ -42,12 +42,12 @@ type Repository struct { // Root returns the absolute path to the repository root. func (r *Repository) Root() string { - return r.rootPath + return r.root.Native() } func (r *Repository) CurrentBranch() (string, error) { // load .git/HEAD - ref, err := LoadReferenceFile(filepath.Join(r.rootPath, GitDirectoryName, "HEAD")) + ref, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, "HEAD")) if err != nil { return "", err } @@ -64,7 +64,7 @@ func (r *Repository) CurrentBranch() (string, error) { func (r *Repository) LatestCommit() (string, error) { // load .git/HEAD - ref, err := LoadReferenceFile(filepath.Join(r.rootPath, GitDirectoryName, "HEAD")) + ref, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, "HEAD")) if err != nil { return "", err } @@ -83,7 +83,7 @@ func (r *Repository) LatestCommit() (string, error) { if err != nil { return "", err } - branchHeadRef, err := LoadReferenceFile(filepath.Join(r.rootPath, GitDirectoryName, branchHeadPath)) + branchHeadRef, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, branchHeadPath)) if err != nil { return "", err } @@ -108,7 +108,7 @@ func (r *Repository) loadConfig() error { if err != nil { return fmt.Errorf("unable to load user specific gitconfig: %w", err) } - err = config.loadFile(filepath.Join(r.rootPath, ".git/config")) + err = config.loadFile(r.root, ".git/config") if err != nil { return fmt.Errorf("unable to load repository specific gitconfig: %w", err) } @@ -119,7 +119,7 @@ func (r *Repository) loadConfig() error { // newIgnoreFile constructs a new [ignoreRules] implementation backed by // a file using the specified path relative to the repository root. func (r *Repository) newIgnoreFile(relativeIgnoreFilePath string) ignoreRules { - return newIgnoreFile(filepath.Join(r.rootPath, relativeIgnoreFilePath)) + return newIgnoreFile(r.root, relativeIgnoreFilePath) } // getIgnoreRules returns a slice of [ignoreRules] that apply @@ -132,7 +132,7 @@ func (r *Repository) getIgnoreRules(prefix string) []ignoreRules { return fs } - r.ignore[prefix] = append(r.ignore[prefix], r.newIgnoreFile(filepath.Join(prefix, gitIgnoreFileName))) + r.ignore[prefix] = append(r.ignore[prefix], r.newIgnoreFile(path.Join(prefix, gitIgnoreFileName))) return r.ignore[prefix] } @@ -149,7 +149,7 @@ func (r *Repository) taintIgnoreRules() { // Ignore computes whether to ignore the specified path. // The specified path is relative to the repository root path. func (r *Repository) Ignore(relPath string) (bool, error) { - parts := strings.Split(filepath.ToSlash(relPath), "/") + parts := strings.Split(relPath, "/") // Retain trailing slash for directory patterns. // We know a trailing slash was present if the last element @@ -186,14 +186,9 @@ func (r *Repository) Ignore(relPath string) (bool, error) { return false, nil } -func NewRepository(path string) (*Repository, error) { - path, err := filepath.Abs(path) - if err != nil { - return nil, err - } - +func NewRepository(path vfs.Path) (*Repository, error) { real := true - rootPath, err := folders.FindDirWithLeaf(path, GitDirectoryName) + rootPath, err := vfs.FindLeafInTree(path, GitDirectoryName) if err != nil { if !os.IsNotExist(err) { return nil, err @@ -205,9 +200,9 @@ func NewRepository(path string) (*Repository, error) { } repo := &Repository{ - real: real, - rootPath: rootPath, - ignore: make(map[string][]ignoreRules), + real: real, + root: rootPath, + ignore: make(map[string][]ignoreRules), } err = repo.loadConfig() @@ -221,13 +216,21 @@ func NewRepository(path string) (*Repository, error) { return nil, fmt.Errorf("unable to access core excludes file: %w", err) } + // Load global excludes on this machine. + // This is by definition a local path so we create a new [vfs.Path] instance. + coreExcludes := newStringIgnoreRules([]string{}) + if coreExcludesPath != "" { + dir := filepath.Dir(coreExcludesPath) + base := filepath.Base(coreExcludesPath) + coreExcludes = newIgnoreFile(vfs.MustNew(dir), base) + } + // Initialize root ignore rules. // These are special and not lazily initialized because: // 1) we include a hardcoded ignore pattern // 2) we include a gitignore file at a non-standard path repo.ignore["."] = []ignoreRules{ - // Load global excludes on this machine. - newIgnoreFile(coreExcludesPath), + coreExcludes, // Always ignore root .git directory. newStringIgnoreRules([]string{ ".git", diff --git a/libs/git/repository_test.go b/libs/git/repository_test.go index fb0e3808..7ddc7ea7 100644 --- a/libs/git/repository_test.go +++ b/libs/git/repository_test.go @@ -7,6 +7,7 @@ import ( "strings" "testing" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -43,7 +44,7 @@ func newTestRepository(t *testing.T) *testRepository { _, err = f2.WriteString(`ref: refs/heads/main`) require.NoError(t, err) - repo, err := NewRepository(tmp) + repo, err := NewRepository(vfs.MustNew(tmp)) require.NoError(t, err) return &testRepository{ @@ -53,7 +54,7 @@ func newTestRepository(t *testing.T) *testRepository { } func (testRepo *testRepository) checkoutCommit(commitId string) { - f, err := os.OpenFile(filepath.Join(testRepo.r.rootPath, ".git", "HEAD"), os.O_WRONLY|os.O_TRUNC, os.ModePerm) + f, err := os.OpenFile(filepath.Join(testRepo.r.Root(), ".git", "HEAD"), os.O_WRONLY|os.O_TRUNC, os.ModePerm) require.NoError(testRepo.t, err) defer f.Close() @@ -63,7 +64,7 @@ func (testRepo *testRepository) checkoutCommit(commitId string) { func (testRepo *testRepository) addBranch(name string, latestCommit string) { // create dir for branch head reference - branchDir := filepath.Join(testRepo.r.rootPath, ".git", "refs", "heads") + branchDir := filepath.Join(testRepo.r.Root(), ".git", "refs", "heads") err := os.MkdirAll(branchDir, os.ModePerm) require.NoError(testRepo.t, err) @@ -78,7 +79,7 @@ func (testRepo *testRepository) addBranch(name string, latestCommit string) { } func (testRepo *testRepository) checkoutBranch(name string) { - f, err := os.OpenFile(filepath.Join(testRepo.r.rootPath, ".git", "HEAD"), os.O_WRONLY|os.O_TRUNC, os.ModePerm) + f, err := os.OpenFile(filepath.Join(testRepo.r.Root(), ".git", "HEAD"), os.O_WRONLY|os.O_TRUNC, os.ModePerm) require.NoError(testRepo.t, err) defer f.Close() @@ -89,7 +90,7 @@ func (testRepo *testRepository) checkoutBranch(name string) { // add remote origin url to test repo func (testRepo *testRepository) addOriginUrl(url string) { // open config in append mode - f, err := os.OpenFile(filepath.Join(testRepo.r.rootPath, ".git", "config"), os.O_WRONLY|os.O_APPEND, os.ModePerm) + f, err := os.OpenFile(filepath.Join(testRepo.r.Root(), ".git", "config"), os.O_WRONLY|os.O_APPEND, os.ModePerm) require.NoError(testRepo.t, err) defer f.Close() @@ -128,7 +129,7 @@ func (testRepo *testRepository) assertOriginUrl(expected string) { func TestRepository(t *testing.T) { // Load this repository as test. - repo, err := NewRepository("../..") + repo, err := NewRepository(vfs.MustNew("../..")) tr := testRepository{t, repo} require.NoError(t, err) @@ -142,7 +143,7 @@ func TestRepository(t *testing.T) { assert.True(t, tr.Ignore("vendor/")) // Check that ignores under testdata work. - assert.True(t, tr.Ignore(filepath.Join("libs", "git", "testdata", "root.ignoreme"))) + assert.True(t, tr.Ignore("libs/git/testdata/root.ignoreme")) } func TestRepositoryGitConfigForEmptyRepo(t *testing.T) { @@ -192,7 +193,7 @@ func TestRepositoryGitConfigForSshUrl(t *testing.T) { func TestRepositoryGitConfigWhenNotARepo(t *testing.T) { tmp := t.TempDir() - repo, err := NewRepository(tmp) + repo, err := NewRepository(vfs.MustNew(tmp)) require.NoError(t, err) branch, err := repo.CurrentBranch() diff --git a/libs/git/view.go b/libs/git/view.go index 3cb88d8b..90eed0bb 100644 --- a/libs/git/view.go +++ b/libs/git/view.go @@ -1,9 +1,13 @@ package git import ( + "fmt" "os" + "path" "path/filepath" "strings" + + "github.com/databricks/cli/libs/vfs" ) // View represents a view on a directory tree that takes into account @@ -29,17 +33,15 @@ type View struct { // Ignore computes whether to ignore the specified path. // The specified path is relative to the view's target path. -func (v *View) Ignore(path string) (bool, error) { - path = filepath.ToSlash(path) - +func (v *View) Ignore(relPath string) (bool, error) { // Retain trailing slash for directory patterns. // Needs special handling because it is removed by path cleaning. trailingSlash := "" - if strings.HasSuffix(path, "/") { + if strings.HasSuffix(relPath, "/") { trailingSlash = "/" } - return v.repo.Ignore(filepath.Join(v.targetPath, path) + trailingSlash) + return v.repo.Ignore(path.Join(v.targetPath, relPath) + trailingSlash) } // IgnoreFile returns if the gitignore rules in this fileset @@ -70,26 +72,27 @@ func (v *View) IgnoreDirectory(dir string) (bool, error) { return v.Ignore(dir + "/") } -func NewView(path string) (*View, error) { - path, err := filepath.Abs(path) - if err != nil { - return nil, err - } - - repo, err := NewRepository(path) +func NewView(root vfs.Path) (*View, error) { + repo, err := NewRepository(root) if err != nil { return nil, err } // Target path must be relative to the repository root path. - targetPath, err := filepath.Rel(repo.rootPath, path) - if err != nil { - return nil, err + target := root.Native() + prefix := repo.root.Native() + if !strings.HasPrefix(target, prefix) { + return nil, fmt.Errorf("path %q is not within repository root %q", root.Native(), prefix) } + // Make target a relative path. + target = strings.TrimPrefix(target, prefix) + target = strings.TrimPrefix(target, string(os.PathSeparator)) + target = path.Clean(filepath.ToSlash(target)) + return &View{ repo: repo, - targetPath: targetPath, + targetPath: target, }, nil } diff --git a/libs/git/view_test.go b/libs/git/view_test.go index 3ecd301b..76fba345 100644 --- a/libs/git/view_test.go +++ b/libs/git/view_test.go @@ -7,6 +7,7 @@ import ( "path/filepath" "testing" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -89,19 +90,19 @@ func testViewAtRoot(t *testing.T, tv testView) { } func TestViewRootInBricksRepo(t *testing.T) { - v, err := NewView("./testdata") + v, err := NewView(vfs.MustNew("./testdata")) require.NoError(t, err) testViewAtRoot(t, testView{t, v}) } func TestViewRootInTempRepo(t *testing.T) { - v, err := NewView(createFakeRepo(t, "testdata")) + v, err := NewView(vfs.MustNew(createFakeRepo(t, "testdata"))) require.NoError(t, err) testViewAtRoot(t, testView{t, v}) } func TestViewRootInTempDir(t *testing.T) { - v, err := NewView(copyTestdata(t, "testdata")) + v, err := NewView(vfs.MustNew(copyTestdata(t, "testdata"))) require.NoError(t, err) testViewAtRoot(t, testView{t, v}) } @@ -124,20 +125,20 @@ func testViewAtA(t *testing.T, tv testView) { } func TestViewAInBricksRepo(t *testing.T) { - v, err := NewView("./testdata/a") + v, err := NewView(vfs.MustNew("./testdata/a")) require.NoError(t, err) testViewAtA(t, testView{t, v}) } func TestViewAInTempRepo(t *testing.T) { - v, err := NewView(filepath.Join(createFakeRepo(t, "testdata"), "a")) + v, err := NewView(vfs.MustNew(filepath.Join(createFakeRepo(t, "testdata"), "a"))) require.NoError(t, err) testViewAtA(t, testView{t, v}) } func TestViewAInTempDir(t *testing.T) { // Since this is not a fake repo it should not traverse up the tree. - v, err := NewView(filepath.Join(copyTestdata(t, "testdata"), "a")) + v, err := NewView(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a"))) require.NoError(t, err) tv := testView{t, v} @@ -174,20 +175,20 @@ func testViewAtAB(t *testing.T, tv testView) { } func TestViewABInBricksRepo(t *testing.T) { - v, err := NewView("./testdata/a/b") + v, err := NewView(vfs.MustNew("./testdata/a/b")) require.NoError(t, err) testViewAtAB(t, testView{t, v}) } func TestViewABInTempRepo(t *testing.T) { - v, err := NewView(filepath.Join(createFakeRepo(t, "testdata"), "a", "b")) + v, err := NewView(vfs.MustNew(filepath.Join(createFakeRepo(t, "testdata"), "a", "b"))) require.NoError(t, err) testViewAtAB(t, testView{t, v}) } func TestViewABInTempDir(t *testing.T) { // Since this is not a fake repo it should not traverse up the tree. - v, err := NewView(filepath.Join(copyTestdata(t, "testdata"), "a", "b")) + v, err := NewView(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a", "b"))) tv := testView{t, v} require.NoError(t, err) @@ -214,7 +215,7 @@ func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredAtRoot(t *testing.T) // Since root .gitignore already has .databricks, there should be no edits // to root .gitignore - v, err := NewView(repoPath) + v, err := NewView(vfs.MustNew(repoPath)) require.NoError(t, err) err = v.EnsureValidGitIgnoreExists() @@ -234,7 +235,7 @@ func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredInSubdir(t *testing.T // Since root .gitignore already has .databricks, there should be no edits // to a/.gitignore - v, err := NewView(filepath.Join(repoPath, "a")) + v, err := NewView(vfs.MustNew(filepath.Join(repoPath, "a"))) require.NoError(t, err) err = v.EnsureValidGitIgnoreExists() @@ -252,7 +253,7 @@ func TestViewAddsGitignoreWithCacheDir(t *testing.T) { assert.NoError(t, err) // Since root .gitignore was deleted, new view adds .databricks to root .gitignore - v, err := NewView(repoPath) + v, err := NewView(vfs.MustNew(repoPath)) require.NoError(t, err) err = v.EnsureValidGitIgnoreExists() @@ -270,7 +271,7 @@ func TestViewAddsGitignoreWithCacheDirAtSubdir(t *testing.T) { require.NoError(t, err) // Since root .gitignore was deleted, new view adds .databricks to a/.gitignore - v, err := NewView(filepath.Join(repoPath, "a")) + v, err := NewView(vfs.MustNew(filepath.Join(repoPath, "a"))) require.NoError(t, err) err = v.EnsureValidGitIgnoreExists() @@ -287,7 +288,7 @@ func TestViewAddsGitignoreWithCacheDirAtSubdir(t *testing.T) { func TestViewAlwaysIgnoresCacheDir(t *testing.T) { repoPath := createFakeRepo(t, "testdata") - v, err := NewView(repoPath) + v, err := NewView(vfs.MustNew(repoPath)) require.NoError(t, err) err = v.EnsureValidGitIgnoreExists() diff --git a/libs/notebook/detect.go b/libs/notebook/detect.go index 17685f3b..0b7c04d6 100644 --- a/libs/notebook/detect.go +++ b/libs/notebook/detect.go @@ -4,6 +4,7 @@ import ( "bufio" "bytes" "io" + "io/fs" "os" "path/filepath" "strings" @@ -15,8 +16,8 @@ import ( const headerLength = 32 // readHeader reads the first N bytes from a file. -func readHeader(path string) ([]byte, error) { - f, err := os.Open(path) +func readHeader(fsys fs.FS, name string) ([]byte, error) { + f, err := fsys.Open(name) if err != nil { return nil, err } @@ -36,10 +37,10 @@ func readHeader(path string) ([]byte, error) { // Detect returns whether the file at path is a Databricks notebook. // If it is, it returns the notebook language. -func Detect(path string) (notebook bool, language workspace.Language, err error) { +func DetectWithFS(fsys fs.FS, name string) (notebook bool, language workspace.Language, err error) { header := "" - buf, err := readHeader(path) + buf, err := readHeader(fsys, name) if err != nil { return false, "", err } @@ -48,7 +49,7 @@ func Detect(path string) (notebook bool, language workspace.Language, err error) fileHeader := scanner.Text() // Determine which header to expect based on filename extension. - ext := strings.ToLower(filepath.Ext(path)) + ext := strings.ToLower(filepath.Ext(name)) switch ext { case ".py": header = `# Databricks notebook source` @@ -63,7 +64,7 @@ func Detect(path string) (notebook bool, language workspace.Language, err error) header = "-- Databricks notebook source" language = workspace.LanguageSql case ".ipynb": - return DetectJupyter(path) + return DetectJupyterWithFS(fsys, name) default: return false, "", nil } @@ -74,3 +75,11 @@ func Detect(path string) (notebook bool, language workspace.Language, err error) return true, language, nil } + +// Detect calls DetectWithFS with the local filesystem. +// The name argument may be a local relative path or a local absolute path. +func Detect(name string) (notebook bool, language workspace.Language, err error) { + d := filepath.ToSlash(filepath.Dir(name)) + b := filepath.Base(name) + return DetectWithFS(os.DirFS(d), b) +} diff --git a/libs/notebook/detect_jupyter.go b/libs/notebook/detect_jupyter.go index 7d96763c..f631b581 100644 --- a/libs/notebook/detect_jupyter.go +++ b/libs/notebook/detect_jupyter.go @@ -3,7 +3,9 @@ package notebook import ( "encoding/json" "fmt" + "io/fs" "os" + "path/filepath" "github.com/databricks/databricks-sdk-go/service/workspace" ) @@ -56,8 +58,8 @@ func resolveLanguage(nb *jupyter) workspace.Language { // DetectJupyter returns whether the file at path is a valid Jupyter notebook. // We assume it is valid if we can read it as JSON and see a couple expected fields. // If we cannot, importing into the workspace will always fail, so we also return an error. -func DetectJupyter(path string) (notebook bool, language workspace.Language, err error) { - f, err := os.Open(path) +func DetectJupyterWithFS(fsys fs.FS, name string) (notebook bool, language workspace.Language, err error) { + f, err := fsys.Open(name) if err != nil { return false, "", err } @@ -68,18 +70,26 @@ func DetectJupyter(path string) (notebook bool, language workspace.Language, err dec := json.NewDecoder(f) err = dec.Decode(&nb) if err != nil { - return false, "", fmt.Errorf("%s: error loading Jupyter notebook file: %w", path, err) + return false, "", fmt.Errorf("%s: error loading Jupyter notebook file: %w", name, err) } // Not a Jupyter notebook if the cells or metadata fields aren't defined. if nb.Cells == nil || nb.Metadata == nil { - return false, "", fmt.Errorf("%s: invalid Jupyter notebook file", path) + return false, "", fmt.Errorf("%s: invalid Jupyter notebook file", name) } // Major version must be at least 4. if nb.NbFormatMajor < 4 { - return false, "", fmt.Errorf("%s: unsupported Jupyter notebook version: %d", path, nb.NbFormatMajor) + return false, "", fmt.Errorf("%s: unsupported Jupyter notebook version: %d", name, nb.NbFormatMajor) } return true, resolveLanguage(&nb), nil } + +// DetectJupyter calls DetectJupyterWithFS with the local filesystem. +// The name argument may be a local relative path or a local absolute path. +func DetectJupyter(name string) (notebook bool, language workspace.Language, err error) { + d := filepath.ToSlash(filepath.Dir(name)) + b := filepath.Base(name) + return DetectJupyterWithFS(os.DirFS(d), b) +} diff --git a/libs/sync/diff.go b/libs/sync/diff.go index 074bfc56..e91f7277 100644 --- a/libs/sync/diff.go +++ b/libs/sync/diff.go @@ -2,7 +2,6 @@ package sync import ( "path" - "path/filepath" "golang.org/x/exp/maps" ) @@ -64,7 +63,7 @@ func (d *diff) addFilesWithRemoteNameChanged(after *SnapshotState, before *Snaps func (d *diff) addNewFiles(after *SnapshotState, before *SnapshotState) { for localName := range after.LastModifiedTimes { if _, ok := before.LastModifiedTimes[localName]; !ok { - d.put = append(d.put, filepath.ToSlash(localName)) + d.put = append(d.put, localName) } } @@ -79,7 +78,7 @@ func (d *diff) addUpdatedFiles(after *SnapshotState, before *SnapshotState) { for localName, modTime := range after.LastModifiedTimes { prevModTime, ok := before.LastModifiedTimes[localName] if ok && modTime.After(prevModTime) { - d.put = append(d.put, filepath.ToSlash(localName)) + d.put = append(d.put, localName) } } } diff --git a/libs/sync/dirset.go b/libs/sync/dirset.go index 3c37c97c..33b85cb8 100644 --- a/libs/sync/dirset.go +++ b/libs/sync/dirset.go @@ -2,7 +2,6 @@ package sync import ( "path" - "path/filepath" "sort" ) @@ -16,8 +15,8 @@ func MakeDirSet(files []string) DirSet { // Iterate over all files. for _, f := range files { - // Get the directory of the file in /-separated form. - dir := filepath.ToSlash(filepath.Dir(f)) + // Get the directory of the file. + dir := path.Dir(f) // Add this directory and its parents until it is either "." or already in the set. for dir != "." { diff --git a/libs/sync/snapshot.go b/libs/sync/snapshot.go index a27a8c84..392e274d 100644 --- a/libs/sync/snapshot.go +++ b/libs/sync/snapshot.go @@ -172,6 +172,11 @@ func loadOrNewSnapshot(ctx context.Context, opts *SyncOptions) (*Snapshot, error return nil, fmt.Errorf("failed to json unmarshal persisted snapshot: %s", err) } + // Ensure that all paths are slash-separated upon loading + // an existing snapshot file. If it was created by an older + // CLI version (<= v0.220.0), it may contain backslashes. + snapshot.SnapshotState = snapshot.SnapshotState.ToSlash() + snapshot.New = false return snapshot, nil } diff --git a/libs/sync/snapshot_state.go b/libs/sync/snapshot_state.go index 10cd34e6..09bb5b63 100644 --- a/libs/sync/snapshot_state.go +++ b/libs/sync/snapshot_state.go @@ -2,6 +2,7 @@ package sync import ( "fmt" + "path" "path/filepath" "strings" "time" @@ -48,7 +49,7 @@ func NewSnapshotState(localFiles []fileset.File) (*SnapshotState, error) { for k := range localFiles { f := &localFiles[k] // Compute the remote name the file will have in WSFS - remoteName := filepath.ToSlash(f.Relative) + remoteName := f.Relative isNotebook, err := f.IsNotebook() if err != nil { @@ -57,7 +58,7 @@ func NewSnapshotState(localFiles []fileset.File) (*SnapshotState, error) { continue } if isNotebook { - ext := filepath.Ext(remoteName) + ext := path.Ext(remoteName) remoteName = strings.TrimSuffix(remoteName, ext) } @@ -119,3 +120,30 @@ func (fs *SnapshotState) validate() error { } return nil } + +// ToSlash ensures all local paths in the snapshot state +// are slash-separated. Returns a new snapshot state. +func (old SnapshotState) ToSlash() *SnapshotState { + new := SnapshotState{ + LastModifiedTimes: make(map[string]time.Time), + LocalToRemoteNames: make(map[string]string), + RemoteToLocalNames: make(map[string]string), + } + + // Keys are local paths. + for k, v := range old.LastModifiedTimes { + new.LastModifiedTimes[filepath.ToSlash(k)] = v + } + + // Keys are local paths. + for k, v := range old.LocalToRemoteNames { + new.LocalToRemoteNames[filepath.ToSlash(k)] = v + } + + // Values are remote paths. + for k, v := range old.RemoteToLocalNames { + new.RemoteToLocalNames[k] = filepath.ToSlash(v) + } + + return &new +} diff --git a/libs/sync/snapshot_state_test.go b/libs/sync/snapshot_state_test.go index bfcdbef6..92c14e8e 100644 --- a/libs/sync/snapshot_state_test.go +++ b/libs/sync/snapshot_state_test.go @@ -1,25 +1,27 @@ package sync import ( + "runtime" "testing" "time" "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestSnapshotState(t *testing.T) { - fileSet := fileset.New("./testdata/sync-fileset") + fileSet := fileset.New(vfs.MustNew("./testdata/sync-fileset")) files, err := fileSet.All() require.NoError(t, err) // Assert initial contents of the fileset assert.Len(t, files, 4) - assert.Equal(t, "invalid-nb.ipynb", files[0].Name()) - assert.Equal(t, "my-nb.py", files[1].Name()) - assert.Equal(t, "my-script.py", files[2].Name()) - assert.Equal(t, "valid-nb.ipynb", files[3].Name()) + assert.Equal(t, "invalid-nb.ipynb", files[0].Relative) + assert.Equal(t, "my-nb.py", files[1].Relative) + assert.Equal(t, "my-script.py", files[2].Relative) + assert.Equal(t, "valid-nb.ipynb", files[3].Relative) // Assert snapshot state generated from the fileset. Note that the invalid notebook // has been ignored. @@ -114,3 +116,30 @@ func TestSnapshotStateValidationErrors(t *testing.T) { } assert.EqualError(t, s.validate(), "invalid sync state representation. Inconsistent values found. Remote file c points to a. Local file a points to b") } + +func TestSnapshotStateWithBackslashes(t *testing.T) { + if runtime.GOOS != "windows" { + t.Skip("Skipping test on non-Windows platform") + } + + now := time.Now() + s1 := &SnapshotState{ + LastModifiedTimes: map[string]time.Time{ + "foo\\bar.py": now, + }, + LocalToRemoteNames: map[string]string{ + "foo\\bar.py": "foo/bar", + }, + RemoteToLocalNames: map[string]string{ + "foo/bar": "foo\\bar.py", + }, + } + + assert.NoError(t, s1.validate()) + + s2 := s1.ToSlash() + assert.NoError(t, s1.validate()) + assert.Equal(t, map[string]time.Time{"foo/bar.py": now}, s2.LastModifiedTimes) + assert.Equal(t, map[string]string{"foo/bar.py": "foo/bar"}, s2.LocalToRemoteNames) + assert.Equal(t, map[string]string{"foo/bar": "foo/bar.py"}, s2.RemoteToLocalNames) +} diff --git a/libs/sync/snapshot_test.go b/libs/sync/snapshot_test.go index d6358d4a..050b5d96 100644 --- a/libs/sync/snapshot_test.go +++ b/libs/sync/snapshot_test.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/testfile" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -29,7 +30,7 @@ func TestDiff(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(projectDir) + fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ @@ -93,7 +94,7 @@ func TestSymlinkDiff(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(projectDir) + fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ @@ -124,7 +125,7 @@ func TestFolderDiff(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(projectDir) + fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ @@ -169,7 +170,7 @@ func TestPythonNotebookDiff(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(projectDir) + fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ @@ -244,7 +245,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(projectDir) + fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ @@ -281,7 +282,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(projectDir) + fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 30b68ccf..585e8a88 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -10,12 +10,13 @@ import ( "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/set" + "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/iam" ) type SyncOptions struct { - LocalPath string + LocalPath vfs.Path RemotePath string Include []string Exclude []string @@ -54,6 +55,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { if err != nil { return nil, err } + err = fileSet.EnsureValidGitIgnoreExists() if err != nil { return nil, err @@ -186,7 +188,7 @@ func (s *Sync) GetFileList(ctx context.Context) ([]fileset.File, error) { // tradeoff: doing portable monitoring only due to macOS max descriptor manual ulimit setting requirement // https://github.com/gorakhargosh/watchdog/blob/master/src/watchdog/observers/kqueue.py#L394-L418 all := set.NewSetF(func(f fileset.File) string { - return f.Absolute + return f.Relative }) gitFiles, err := s.fileSet.All() if err != nil { diff --git a/libs/sync/sync_test.go b/libs/sync/sync_test.go index dc220dbf..292586e8 100644 --- a/libs/sync/sync_test.go +++ b/libs/sync/sync_test.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/libs/fileset" "github.com/databricks/cli/libs/git" + "github.com/databricks/cli/libs/vfs" "github.com/stretchr/testify/require" ) @@ -73,16 +74,17 @@ func TestGetFileSet(t *testing.T) { ctx := context.Background() dir := setupFiles(t) - fileSet, err := git.NewFileSet(dir) + root := vfs.MustNew(dir) + fileSet, err := git.NewFileSet(root) require.NoError(t, err) err = fileSet.EnsureValidGitIgnoreExists() require.NoError(t, err) - inc, err := fileset.NewGlobSet(dir, []string{}) + inc, err := fileset.NewGlobSet(root, []string{}) require.NoError(t, err) - excl, err := fileset.NewGlobSet(dir, []string{}) + excl, err := fileset.NewGlobSet(root, []string{}) require.NoError(t, err) s := &Sync{ @@ -97,10 +99,10 @@ func TestGetFileSet(t *testing.T) { require.NoError(t, err) require.Equal(t, len(fileList), 9) - inc, err = fileset.NewGlobSet(dir, []string{}) + inc, err = fileset.NewGlobSet(root, []string{}) require.NoError(t, err) - excl, err = fileset.NewGlobSet(dir, []string{"*.go"}) + excl, err = fileset.NewGlobSet(root, []string{"*.go"}) require.NoError(t, err) s = &Sync{ @@ -115,10 +117,10 @@ func TestGetFileSet(t *testing.T) { require.NoError(t, err) require.Equal(t, len(fileList), 1) - inc, err = fileset.NewGlobSet(dir, []string{".databricks/*"}) + inc, err = fileset.NewGlobSet(root, []string{".databricks/*"}) require.NoError(t, err) - excl, err = fileset.NewGlobSet(dir, []string{}) + excl, err = fileset.NewGlobSet(root, []string{}) require.NoError(t, err) s = &Sync{ @@ -138,16 +140,17 @@ func TestRecursiveExclude(t *testing.T) { ctx := context.Background() dir := setupFiles(t) - fileSet, err := git.NewFileSet(dir) + root := vfs.MustNew(dir) + fileSet, err := git.NewFileSet(root) require.NoError(t, err) err = fileSet.EnsureValidGitIgnoreExists() require.NoError(t, err) - inc, err := fileset.NewGlobSet(dir, []string{}) + inc, err := fileset.NewGlobSet(root, []string{}) require.NoError(t, err) - excl, err := fileset.NewGlobSet(dir, []string{"test/**"}) + excl, err := fileset.NewGlobSet(root, []string{"test/**"}) require.NoError(t, err) s := &Sync{ diff --git a/libs/sync/watchdog.go b/libs/sync/watchdog.go index b0c96e01..ca7ec46e 100644 --- a/libs/sync/watchdog.go +++ b/libs/sync/watchdog.go @@ -4,8 +4,6 @@ import ( "context" "errors" "io/fs" - "os" - "path/filepath" "github.com/databricks/cli/libs/filer" "github.com/databricks/cli/libs/log" @@ -59,7 +57,7 @@ func (s *Sync) applyMkdir(ctx context.Context, localName string) error { func (s *Sync) applyPut(ctx context.Context, localName string) error { s.notifyProgress(ctx, EventActionPut, localName, 0.0) - localFile, err := os.Open(filepath.Join(s.LocalPath, localName)) + localFile, err := s.LocalPath.Open(localName) if err != nil { return err } diff --git a/libs/vfs/leaf.go b/libs/vfs/leaf.go new file mode 100644 index 00000000..8c11f903 --- /dev/null +++ b/libs/vfs/leaf.go @@ -0,0 +1,29 @@ +package vfs + +import ( + "errors" + "io/fs" +) + +// FindLeafInTree returns the first path that holds `name`, +// traversing up to the root of the filesystem, starting at `p`. +func FindLeafInTree(p Path, name string) (Path, error) { + for p != nil { + _, err := fs.Stat(p, name) + + // No error means we found the leaf in p. + if err == nil { + return p, nil + } + + // ErrNotExist means we continue traversal up the tree. + if errors.Is(err, fs.ErrNotExist) { + p = p.Parent() + continue + } + + return nil, err + } + + return nil, fs.ErrNotExist +} diff --git a/libs/vfs/leaf_test.go b/libs/vfs/leaf_test.go new file mode 100644 index 00000000..da9412ec --- /dev/null +++ b/libs/vfs/leaf_test.go @@ -0,0 +1,38 @@ +package vfs + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFindLeafInTree(t *testing.T) { + wd, err := os.Getwd() + require.NoError(t, err) + + root := filepath.Join(wd, "..", "..") + + // Find from working directory should work. + { + out, err := FindLeafInTree(MustNew(wd), ".git") + assert.NoError(t, err) + assert.Equal(t, root, out.Native()) + } + + // Find from project root itself should work. + { + out, err := FindLeafInTree(MustNew(root), ".git") + assert.NoError(t, err) + assert.Equal(t, root, out.Native()) + } + + // Find for something that doesn't exist should work. + { + out, err := FindLeafInTree(MustNew(root), "this-leaf-doesnt-exist-anywhere") + assert.ErrorIs(t, err, os.ErrNotExist) + assert.Equal(t, nil, out) + } +} diff --git a/libs/vfs/os.go b/libs/vfs/os.go new file mode 100644 index 00000000..26447d83 --- /dev/null +++ b/libs/vfs/os.go @@ -0,0 +1,82 @@ +package vfs + +import ( + "io/fs" + "os" + "path/filepath" +) + +type osPath struct { + path string + + openFn func(name string) (fs.File, error) + statFn func(name string) (fs.FileInfo, error) + readDirFn func(name string) ([]fs.DirEntry, error) + readFileFn func(name string) ([]byte, error) +} + +func New(name string) (Path, error) { + abs, err := filepath.Abs(name) + if err != nil { + return nil, err + } + + return newOsPath(abs), nil +} + +func MustNew(name string) Path { + p, err := New(name) + if err != nil { + panic(err) + } + + return p +} + +func newOsPath(name string) Path { + if !filepath.IsAbs(name) { + panic("vfs: abs path must be absolute") + } + + // [os.DirFS] implements all required interfaces. + // We used type assertion below to get the underlying types. + dirfs := os.DirFS(name) + + return &osPath{ + path: name, + + openFn: dirfs.Open, + statFn: dirfs.(fs.StatFS).Stat, + readDirFn: dirfs.(fs.ReadDirFS).ReadDir, + readFileFn: dirfs.(fs.ReadFileFS).ReadFile, + } +} + +func (o osPath) Open(name string) (fs.File, error) { + return o.openFn(name) +} + +func (o osPath) Stat(name string) (fs.FileInfo, error) { + return o.statFn(name) +} + +func (o osPath) ReadDir(name string) ([]fs.DirEntry, error) { + return o.readDirFn(name) +} + +func (o osPath) ReadFile(name string) ([]byte, error) { + return o.readFileFn(name) +} + +func (o osPath) Parent() Path { + dir := filepath.Dir(o.path) + if dir == o.path { + return nil + } + + return newOsPath(dir) +} + +func (o osPath) Native() string { + return o.path +} diff --git a/libs/vfs/os_test.go b/libs/vfs/os_test.go new file mode 100644 index 00000000..6199bdc7 --- /dev/null +++ b/libs/vfs/os_test.go @@ -0,0 +1,54 @@ +package vfs + +import ( + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestOsNewWithRelativePath(t *testing.T) { + wd, err := os.Getwd() + require.NoError(t, err) + + p, err := New(".") + require.NoError(t, err) + require.Equal(t, wd, p.Native()) +} + +func TestOsPathParent(t *testing.T) { + wd, err := os.Getwd() + require.NoError(t, err) + + p := MustNew(wd) + require.NotNil(t, p) + + // Traverse all the way to the root. + for { + q := p.Parent() + if q == nil { + // Parent returns nil when it is the root. + break + } + + p = q + } + + // We should have reached the root. + if runtime.GOOS == "windows" { + require.Equal(t, filepath.VolumeName(wd)+`\`, p.Native()) + } else { + require.Equal(t, "/", p.Native()) + } +} + +func TestOsPathNative(t *testing.T) { + wd, err := os.Getwd() + require.NoError(t, err) + + p := MustNew(wd) + require.NotNil(t, p) + require.Equal(t, wd, p.Native()) +} diff --git a/libs/vfs/path.go b/libs/vfs/path.go new file mode 100644 index 00000000..19f119d5 --- /dev/null +++ b/libs/vfs/path.go @@ -0,0 +1,29 @@ +package vfs + +import "io/fs" + +// FS combines the fs.FS, fs.StatFS, fs.ReadDirFS, and fs.ReadFileFS interfaces. +// It mandates that Path implementations must support all these interfaces. +type FS interface { + fs.FS + fs.StatFS + fs.ReadDirFS + fs.ReadFileFS +} + +// Path defines a read-only virtual file system interface for: +// +// 1. Intercepting file operations to inject custom logic (e.g., logging, access control). +// 2. Traversing directories to find specific leaf directories (e.g., .git). +// 3. Converting virtual paths to OS-native paths. +// +// Options 2 and 3 are not possible with the standard fs.FS interface. +// They are needed such that we can provide an instance to the sync package +// and still detect the containing .git directory and convert paths to native paths. +type Path interface { + FS + + Parent() Path + + Native() string +} diff --git a/libs/vfs/path_test.go b/libs/vfs/path_test.go new file mode 100644 index 00000000..54c60940 --- /dev/null +++ b/libs/vfs/path_test.go @@ -0,0 +1 @@ +package vfs