Abstract over filesystem interaction with libs/vfs (#1452)

## Changes

Introduce `libs/vfs` for an implementation of `fs.FS` and friends that
_includes_ the absolute path it is anchored to.

This is needed for:
1. Intercepting file operations to inject custom logic (e.g., logging,
access control).
2. Traversing directories to find specific leaf directories (e.g.,
`.git`).
3. Converting virtual paths to OS-native paths.

Options 2 and 3 are not possible with the standard `fs.FS` interface.
They are needed such that we can provide an instance to the sync package
and still detect the containing `.git` directory and convert paths to
native paths.

This change focuses on making the following packages use `vfs.Path`:
* libs/fileset
* libs/git
* libs/sync

All entries returned by `fileset.All` are now slash-separated. This has
2 consequences:
* The sync snapshot now always uses slash-separated paths
* We don't need to call `filepath.FromSlash` as much as we did

## Tests

* All unit tests pass
* All integration tests pass
* Manually confirmed that a deployment made on Windows by a previous
version of the CLI can be deployed by a new version of the CLI while
retaining the validity of the local sync snapshot as well as the remote
deployment state.
This commit is contained in:
Pieter Noordhuis 2024-05-30 09:41:50 +02:00 committed by GitHub
parent b2ea9dd971
commit 424499ec1d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
42 changed files with 603 additions and 285 deletions

View File

@ -22,6 +22,7 @@ import (
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/tags"
"github.com/databricks/cli/libs/terraform"
"github.com/databricks/cli/libs/vfs"
"github.com/databricks/databricks-sdk-go"
sdkconfig "github.com/databricks/databricks-sdk-go/config"
"github.com/hashicorp/terraform-exec/tfexec"
@ -208,7 +209,7 @@ func (b *Bundle) GitRepository() (*git.Repository, error) {
return nil, fmt.Errorf("unable to locate repository root: %w", err)
}
return git.NewRepository(rootPath)
return git.NewRepository(vfs.MustNew(rootPath))
}
// AuthEnv returns a map with environment variables and their values

View File

@ -8,6 +8,7 @@ import (
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/vfs"
)
type loadGitDetails struct{}
@ -22,7 +23,7 @@ func (m *loadGitDetails) Name() string {
func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
// Load relevant git repository
repo, err := git.NewRepository(b.RootPath)
repo, err := git.NewRepository(vfs.MustNew(b.RootPath))
if err != nil {
return diag.FromErr(err)
}

View File

@ -8,6 +8,7 @@ import (
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/fileset"
"github.com/databricks/cli/libs/vfs"
"golang.org/x/sync/errgroup"
)
@ -50,7 +51,7 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di
index := i
p := pattern
errs.Go(func() error {
fs, err := fileset.NewGlobSet(rb.RootPath(), []string{p})
fs, err := fileset.NewGlobSet(vfs.MustNew(rb.RootPath()), []string{p})
if err != nil {
return err
}

View File

@ -6,6 +6,7 @@ import (
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/sync"
"github.com/databricks/cli/libs/vfs"
)
func GetSync(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.Sync, error) {
@ -28,7 +29,7 @@ func GetSyncOptions(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.SyncOp
}
opts := &sync.SyncOptions{
LocalPath: rb.RootPath(),
LocalPath: vfs.MustNew(rb.RootPath()),
RemotePath: rb.Config().Workspace.FilePath,
Include: includes,
Exclude: rb.Config().Sync.Exclude,

View File

@ -12,6 +12,7 @@ import (
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/fileset"
"github.com/databricks/cli/libs/vfs"
)
const DeploymentStateFileName = "deployment.json"
@ -112,12 +113,18 @@ func FromSlice(files []fileset.File) (Filelist, error) {
func (f Filelist) ToSlice(basePath string) []fileset.File {
var files []fileset.File
root := vfs.MustNew(basePath)
for _, file := range f {
absPath := filepath.Join(basePath, file.LocalPath)
entry := newEntry(filepath.Join(basePath, file.LocalPath))
// Snapshots created with versions <= v0.220.0 use platform-specific
// paths (i.e. with backslashes). Files returned by [libs/fileset] always
// contain forward slashes after this version. Normalize before using.
relative := filepath.ToSlash(file.LocalPath)
if file.IsNotebook {
files = append(files, fileset.NewNotebookFile(newEntry(absPath), absPath, file.LocalPath))
files = append(files, fileset.NewNotebookFile(root, entry, relative))
} else {
files = append(files, fileset.NewSourceFile(newEntry(absPath), absPath, file.LocalPath))
files = append(files, fileset.NewSourceFile(root, entry, relative))
}
}
return files

View File

@ -3,17 +3,17 @@ package deploy
import (
"bytes"
"encoding/json"
"path/filepath"
"testing"
"github.com/databricks/cli/internal/testutil"
"github.com/databricks/cli/libs/fileset"
"github.com/databricks/cli/libs/vfs"
"github.com/stretchr/testify/require"
)
func TestFromSlice(t *testing.T) {
tmpDir := t.TempDir()
fileset := fileset.New(tmpDir)
fileset := fileset.New(vfs.MustNew(tmpDir))
testutil.Touch(t, tmpDir, "test1.py")
testutil.Touch(t, tmpDir, "test2.py")
testutil.Touch(t, tmpDir, "test3.py")
@ -32,7 +32,7 @@ func TestFromSlice(t *testing.T) {
func TestToSlice(t *testing.T) {
tmpDir := t.TempDir()
fileset := fileset.New(tmpDir)
fileset := fileset.New(vfs.MustNew(tmpDir))
testutil.Touch(t, tmpDir, "test1.py")
testutil.Touch(t, tmpDir, "test2.py")
testutil.Touch(t, tmpDir, "test3.py")
@ -48,18 +48,11 @@ func TestToSlice(t *testing.T) {
require.Len(t, s, 3)
for _, file := range s {
require.Contains(t, []string{"test1.py", "test2.py", "test3.py"}, file.Name())
require.Contains(t, []string{
filepath.Join(tmpDir, "test1.py"),
filepath.Join(tmpDir, "test2.py"),
filepath.Join(tmpDir, "test3.py"),
}, file.Absolute)
require.False(t, file.IsDir())
require.NotZero(t, file.Type())
info, err := file.Info()
require.NoError(t, err)
require.NotNil(t, info)
require.Equal(t, file.Name(), info.Name())
require.Contains(t, []string{"test1.py", "test2.py", "test3.py"}, file.Relative)
// If the mtime is not zero we know we produced a valid fs.DirEntry.
ts := file.Modified()
require.NotZero(t, ts)
}
}

View File

@ -14,6 +14,7 @@ import (
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/flags"
"github.com/databricks/cli/libs/sync"
"github.com/databricks/cli/libs/vfs"
"github.com/spf13/cobra"
)
@ -46,7 +47,7 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn
}
opts := sync.SyncOptions{
LocalPath: args[0],
LocalPath: vfs.MustNew(args[0]),
RemotePath: args[1],
Full: f.full,
PollInterval: f.interval,

View File

@ -31,7 +31,7 @@ func TestSyncOptionsFromBundle(t *testing.T) {
f := syncFlags{}
opts, err := f.syncOptionsFromBundle(New(), []string{}, b)
require.NoError(t, err)
assert.Equal(t, tempDir, opts.LocalPath)
assert.Equal(t, tempDir, opts.LocalPath.Native())
assert.Equal(t, "/Users/jane@doe.com/path", opts.RemotePath)
assert.Equal(t, filepath.Join(tempDir, ".databricks", "bundle", "default"), opts.SnapshotBasePath)
assert.NotNil(t, opts.WorkspaceClient)
@ -49,11 +49,14 @@ func TestSyncOptionsFromArgsRequiredTwoArgs(t *testing.T) {
}
func TestSyncOptionsFromArgs(t *testing.T) {
local := t.TempDir()
remote := "/remote"
f := syncFlags{}
cmd := New()
cmd.SetContext(root.SetWorkspaceClient(context.Background(), nil))
opts, err := f.syncOptionsFromArgs(cmd, []string{"/local", "/remote"})
opts, err := f.syncOptionsFromArgs(cmd, []string{local, remote})
require.NoError(t, err)
assert.Equal(t, "/local", opts.LocalPath)
assert.Equal(t, "/remote", opts.RemotePath)
assert.Equal(t, local, opts.LocalPath.Native())
assert.Equal(t, remote, opts.RemotePath)
}

View File

@ -313,7 +313,7 @@ func TestAccSyncNestedFolderSync(t *testing.T) {
assertSync.remoteDirContent(ctx, "dir1", []string{"dir2"})
assertSync.remoteDirContent(ctx, "dir1/dir2", []string{"dir3"})
assertSync.remoteDirContent(ctx, "dir1/dir2/dir3", []string{"foo.txt"})
assertSync.snapshotContains(append(repoFiles, ".gitignore", filepath.FromSlash("dir1/dir2/dir3/foo.txt")))
assertSync.snapshotContains(append(repoFiles, ".gitignore", "dir1/dir2/dir3/foo.txt"))
// delete
f.Remove(t)
@ -374,7 +374,7 @@ func TestAccSyncNestedSpacePlusAndHashAreEscapedSync(t *testing.T) {
assertSync.remoteDirContent(ctx, "dir1", []string{"a b+c"})
assertSync.remoteDirContent(ctx, "dir1/a b+c", []string{"c+d e"})
assertSync.remoteDirContent(ctx, "dir1/a b+c/c+d e", []string{"e+f g#i.txt"})
assertSync.snapshotContains(append(repoFiles, ".gitignore", filepath.FromSlash("dir1/a b+c/c+d e/e+f g#i.txt")))
assertSync.snapshotContains(append(repoFiles, ".gitignore", "dir1/a b+c/c+d e/e+f g#i.txt"))
// delete
f.Remove(t)
@ -404,7 +404,7 @@ func TestAccSyncIncrementalFileOverwritesFolder(t *testing.T) {
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo"))
assertSync.remoteDirContent(ctx, "foo", []string{"bar.txt"})
assertSync.snapshotContains(append(repoFiles, ".gitignore", filepath.FromSlash("foo/bar.txt")))
assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo/bar.txt"))
// delete foo/bar.txt
f.Remove(t)

View File

@ -5,6 +5,7 @@ import (
"time"
"github.com/databricks/cli/libs/notebook"
"github.com/databricks/cli/libs/vfs"
)
type fileType int
@ -16,40 +17,49 @@ const (
)
type File struct {
fs.DirEntry
Absolute, Relative string
// Root path of the fileset.
root vfs.Path
// File entry as returned by the [fs.WalkDir] function.
entry fs.DirEntry
// Type of the file.
fileType fileType
// Relative path within the fileset.
// Combine with the [vfs.Path] to interact with the underlying file.
Relative string
}
func NewNotebookFile(entry fs.DirEntry, absolute string, relative string) File {
func NewNotebookFile(root vfs.Path, entry fs.DirEntry, relative string) File {
return File{
DirEntry: entry,
Absolute: absolute,
Relative: relative,
root: root,
entry: entry,
fileType: Notebook,
Relative: relative,
}
}
func NewFile(entry fs.DirEntry, absolute string, relative string) File {
func NewFile(root vfs.Path, entry fs.DirEntry, relative string) File {
return File{
DirEntry: entry,
Absolute: absolute,
Relative: relative,
root: root,
entry: entry,
fileType: Unknown,
Relative: relative,
}
}
func NewSourceFile(entry fs.DirEntry, absolute string, relative string) File {
func NewSourceFile(root vfs.Path, entry fs.DirEntry, relative string) File {
return File{
DirEntry: entry,
Absolute: absolute,
Relative: relative,
root: root,
entry: entry,
fileType: Source,
Relative: relative,
}
}
func (f File) Modified() (ts time.Time) {
info, err := f.Info()
info, err := f.entry.Info()
if err != nil {
// return default time, beginning of epoch
return ts
@ -63,7 +73,7 @@ func (f *File) IsNotebook() (bool, error) {
}
// Otherwise, detect the notebook type.
isNotebook, _, err := notebook.Detect(f.Absolute)
isNotebook, _, err := notebook.DetectWithFS(f.root, f.Relative)
if err != nil {
return false, err
}

View File

@ -1,22 +1,22 @@
package fileset
import (
"path/filepath"
"testing"
"github.com/databricks/cli/internal/testutil"
"github.com/databricks/cli/libs/vfs"
"github.com/stretchr/testify/require"
)
func TestNotebookFileIsNotebook(t *testing.T) {
f := NewNotebookFile(nil, "", "")
f := NewNotebookFile(nil, nil, "")
isNotebook, err := f.IsNotebook()
require.NoError(t, err)
require.True(t, isNotebook)
}
func TestSourceFileIsNotNotebook(t *testing.T) {
f := NewSourceFile(nil, "", "")
f := NewSourceFile(nil, nil, "")
isNotebook, err := f.IsNotebook()
require.NoError(t, err)
require.False(t, isNotebook)
@ -24,18 +24,19 @@ func TestSourceFileIsNotNotebook(t *testing.T) {
func TestUnknownFileDetectsNotebook(t *testing.T) {
tmpDir := t.TempDir()
root := vfs.MustNew(tmpDir)
t.Run("file", func(t *testing.T) {
path := testutil.Touch(t, tmpDir, "test.py")
f := NewFile(nil, path, filepath.Base(path))
testutil.Touch(t, tmpDir, "test.py")
f := NewFile(root, nil, "test.py")
isNotebook, err := f.IsNotebook()
require.NoError(t, err)
require.False(t, isNotebook)
})
t.Run("notebook", func(t *testing.T) {
path := testutil.TouchNotebook(t, tmpDir, "notebook.py")
f := NewFile(nil, path, filepath.Base(path))
testutil.TouchNotebook(t, tmpDir, "notebook.py")
f := NewFile(root, nil, "notebook.py")
isNotebook, err := f.IsNotebook()
require.NoError(t, err)
require.True(t, isNotebook)

View File

@ -4,20 +4,24 @@ import (
"fmt"
"io/fs"
"os"
"path/filepath"
"github.com/databricks/cli/libs/vfs"
)
// FileSet facilitates fast recursive file listing of a path.
// It optionally takes into account ignore rules through the [Ignorer] interface.
type FileSet struct {
root string
// Root path of the fileset.
root vfs.Path
// Ignorer interface to check if a file or directory should be ignored.
ignore Ignorer
}
// New returns a [FileSet] for the given root path.
func New(root string) *FileSet {
func New(root vfs.Path) *FileSet {
return &FileSet{
root: filepath.Clean(root),
root: root,
ignore: nopIgnorer{},
}
}
@ -32,11 +36,6 @@ func (w *FileSet) SetIgnorer(ignore Ignorer) {
w.ignore = ignore
}
// Return root for fileset.
func (w *FileSet) Root() string {
return w.root
}
// Return all tracked files for Repo
func (w *FileSet) All() ([]File, error) {
return w.recursiveListFiles()
@ -46,12 +45,7 @@ func (w *FileSet) All() ([]File, error) {
// that are being tracked in the FileSet (ie not being ignored for matching one of the
// patterns in w.ignore)
func (w *FileSet) recursiveListFiles() (fileList []File, err error) {
err = filepath.WalkDir(w.root, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
relPath, err := filepath.Rel(w.root, path)
err = fs.WalkDir(w.root, ".", func(name string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
@ -66,25 +60,25 @@ func (w *FileSet) recursiveListFiles() (fileList []File, err error) {
}
if d.IsDir() {
ign, err := w.ignore.IgnoreDirectory(relPath)
ign, err := w.ignore.IgnoreDirectory(name)
if err != nil {
return fmt.Errorf("cannot check if %s should be ignored: %w", relPath, err)
return fmt.Errorf("cannot check if %s should be ignored: %w", name, err)
}
if ign {
return filepath.SkipDir
return fs.SkipDir
}
return nil
}
ign, err := w.ignore.IgnoreFile(relPath)
ign, err := w.ignore.IgnoreFile(name)
if err != nil {
return fmt.Errorf("cannot check if %s should be ignored: %w", relPath, err)
return fmt.Errorf("cannot check if %s should be ignored: %w", name, err)
}
if ign {
return nil
}
fileList = append(fileList, NewFile(d, path, relPath))
fileList = append(fileList, NewFile(w.root, d, name))
return nil
})
return

View File

@ -1,22 +1,17 @@
package fileset
import (
"path/filepath"
"path"
"github.com/databricks/cli/libs/vfs"
)
func NewGlobSet(root string, includes []string) (*FileSet, error) {
absRoot, err := filepath.Abs(root)
if err != nil {
return nil, err
}
func NewGlobSet(root vfs.Path, includes []string) (*FileSet, error) {
for k := range includes {
includes[k] = filepath.ToSlash(filepath.Clean(includes[k]))
includes[k] = path.Clean(includes[k])
}
fs := &FileSet{
absRoot,
newIncluder(includes),
}
fs := New(root)
fs.SetIgnorer(newIncluder(includes))
return fs, nil
}

View File

@ -2,21 +2,26 @@ package fileset
import (
"io/fs"
"os"
"path/filepath"
"path"
"slices"
"strings"
"testing"
"github.com/databricks/cli/libs/vfs"
"github.com/stretchr/testify/require"
)
func TestGlobFileset(t *testing.T) {
cwd, err := os.Getwd()
require.NoError(t, err)
root := filepath.Join(cwd, "..", "filer")
func collectRelativePaths(files []File) []string {
relativePaths := make([]string, 0)
for _, f := range files {
relativePaths = append(relativePaths, f.Relative)
}
return relativePaths
}
entries, err := os.ReadDir(root)
func TestGlobFileset(t *testing.T) {
root := vfs.MustNew("../filer")
entries, err := root.ReadDir(".")
require.NoError(t, err)
g, err := NewGlobSet(root, []string{
@ -30,7 +35,7 @@ func TestGlobFileset(t *testing.T) {
require.Equal(t, len(files), len(entries))
for _, f := range files {
exists := slices.ContainsFunc(entries, func(de fs.DirEntry) bool {
return de.Name() == f.Name()
return de.Name() == path.Base(f.Relative)
})
require.True(t, exists)
}
@ -46,9 +51,8 @@ func TestGlobFileset(t *testing.T) {
}
func TestGlobFilesetWithRelativeRoot(t *testing.T) {
root := filepath.Join("..", "filer")
entries, err := os.ReadDir(root)
root := vfs.MustNew("../filer")
entries, err := root.ReadDir(".")
require.NoError(t, err)
g, err := NewGlobSet(root, []string{
@ -58,21 +62,14 @@ func TestGlobFilesetWithRelativeRoot(t *testing.T) {
files, err := g.All()
require.NoError(t, err)
require.Equal(t, len(files), len(entries))
for _, f := range files {
require.True(t, filepath.IsAbs(f.Absolute))
}
}
func TestGlobFilesetRecursively(t *testing.T) {
cwd, err := os.Getwd()
require.NoError(t, err)
root := filepath.Join(cwd, "..", "git")
root := vfs.MustNew("../git")
entries := make([]string, 0)
err = filepath.Walk(filepath.Join(root, "testdata"), func(path string, info fs.FileInfo, err error) error {
if !info.IsDir() {
err := fs.WalkDir(root, "testdata", func(path string, d fs.DirEntry, err error) error {
if !d.IsDir() {
entries = append(entries, path)
}
return nil
@ -86,24 +83,14 @@ func TestGlobFilesetRecursively(t *testing.T) {
files, err := g.All()
require.NoError(t, err)
require.Equal(t, len(files), len(entries))
for _, f := range files {
exists := slices.ContainsFunc(entries, func(path string) bool {
return path == f.Absolute
})
require.True(t, exists)
}
require.ElementsMatch(t, entries, collectRelativePaths(files))
}
func TestGlobFilesetDir(t *testing.T) {
cwd, err := os.Getwd()
require.NoError(t, err)
root := filepath.Join(cwd, "..", "git")
root := vfs.MustNew("../git")
entries := make([]string, 0)
err = filepath.Walk(filepath.Join(root, "testdata", "a"), func(path string, info fs.FileInfo, err error) error {
if !info.IsDir() {
err := fs.WalkDir(root, "testdata/a", func(path string, d fs.DirEntry, err error) error {
if !d.IsDir() {
entries = append(entries, path)
}
return nil
@ -117,23 +104,13 @@ func TestGlobFilesetDir(t *testing.T) {
files, err := g.All()
require.NoError(t, err)
require.Equal(t, len(files), len(entries))
for _, f := range files {
exists := slices.ContainsFunc(entries, func(path string) bool {
return path == f.Absolute
})
require.True(t, exists)
}
require.ElementsMatch(t, entries, collectRelativePaths(files))
}
func TestGlobFilesetDoubleQuotesWithFilePatterns(t *testing.T) {
cwd, err := os.Getwd()
require.NoError(t, err)
root := filepath.Join(cwd, "..", "git")
root := vfs.MustNew("../git")
entries := make([]string, 0)
err = filepath.Walk(filepath.Join(root, "testdata"), func(path string, info fs.FileInfo, err error) error {
err := fs.WalkDir(root, "testdata", func(path string, d fs.DirEntry, err error) error {
if strings.HasSuffix(path, ".txt") {
entries = append(entries, path)
}
@ -148,12 +125,5 @@ func TestGlobFilesetDoubleQuotesWithFilePatterns(t *testing.T) {
files, err := g.All()
require.NoError(t, err)
require.Equal(t, len(files), len(entries))
for _, f := range files {
exists := slices.ContainsFunc(entries, func(path string) bool {
return path == f.Absolute
})
require.True(t, exists)
}
require.ElementsMatch(t, entries, collectRelativePaths(files))
}

View File

@ -8,6 +8,7 @@ import (
"regexp"
"strings"
"github.com/databricks/cli/libs/vfs"
"gopkg.in/ini.v1"
)
@ -87,8 +88,8 @@ func (c config) load(r io.Reader) error {
return nil
}
func (c config) loadFile(path string) error {
f, err := os.Open(path)
func (c config) loadFile(fs vfs.Path, path string) error {
f, err := fs.Open(path)
if err != nil {
// If the file doesn't exist it is ignored.
// This is the case for both global and repository specific config files.
@ -152,8 +153,8 @@ func globalGitConfig() (*config, error) {
// > are missing or unreadable they will be ignored.
//
// We therefore ignore the error return value for the calls below.
config.loadFile(filepath.Join(xdgConfigHome, "git/config"))
config.loadFile(filepath.Join(config.home, ".gitconfig"))
config.loadFile(vfs.MustNew(xdgConfigHome), "git/config")
config.loadFile(vfs.MustNew(config.home), ".gitconfig")
return config, nil
}

View File

@ -2,6 +2,7 @@ package git
import (
"github.com/databricks/cli/libs/fileset"
"github.com/databricks/cli/libs/vfs"
)
// FileSet is Git repository aware implementation of [fileset.FileSet].
@ -13,7 +14,7 @@ type FileSet struct {
}
// NewFileSet returns [FileSet] for the Git repository located at `root`.
func NewFileSet(root string) (*FileSet, error) {
func NewFileSet(root vfs.Path) (*FileSet, error) {
fs := fileset.New(root)
v, err := NewView(root)
if err != nil {
@ -34,10 +35,6 @@ func (f *FileSet) IgnoreDirectory(dir string) (bool, error) {
return f.view.IgnoreDirectory(dir)
}
func (f *FileSet) Root() string {
return f.fileset.Root()
}
func (f *FileSet) All() ([]fileset.File, error) {
f.view.repo.taintIgnoreRules()
return f.fileset.All()

View File

@ -2,23 +2,25 @@ package git
import (
"os"
"path"
"path/filepath"
"strings"
"testing"
"github.com/databricks/cli/libs/vfs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func testFileSetAll(t *testing.T, path string) {
fileSet, err := NewFileSet(path)
func testFileSetAll(t *testing.T, root string) {
fileSet, err := NewFileSet(vfs.MustNew(root))
require.NoError(t, err)
files, err := fileSet.All()
require.NoError(t, err)
require.Len(t, files, 3)
assert.Equal(t, filepath.Join("a", "b", "world.txt"), files[0].Relative)
assert.Equal(t, filepath.Join("a", "hello.txt"), files[1].Relative)
assert.Equal(t, filepath.Join("databricks.yml"), files[2].Relative)
assert.Equal(t, path.Join("a", "b", "world.txt"), files[0].Relative)
assert.Equal(t, path.Join("a", "hello.txt"), files[1].Relative)
assert.Equal(t, path.Join("databricks.yml"), files[2].Relative)
}
func TestFileSetListAllInRepo(t *testing.T) {
@ -33,7 +35,7 @@ func TestFileSetNonCleanRoot(t *testing.T) {
// Test what happens if the root directory can be simplified.
// Path simplification is done by most filepath functions.
// This should yield the same result as above test.
fileSet, err := NewFileSet("./testdata/../testdata")
fileSet, err := NewFileSet(vfs.MustNew("./testdata/../testdata"))
require.NoError(t, err)
files, err := fileSet.All()
require.NoError(t, err)
@ -42,7 +44,7 @@ func TestFileSetNonCleanRoot(t *testing.T) {
func TestFileSetAddsCacheDirToGitIgnore(t *testing.T) {
projectDir := t.TempDir()
fileSet, err := NewFileSet(projectDir)
fileSet, err := NewFileSet(vfs.MustNew(projectDir))
require.NoError(t, err)
fileSet.EnsureValidGitIgnoreExists()
@ -57,7 +59,7 @@ func TestFileSetDoesNotCacheDirToGitIgnoreIfAlreadyPresent(t *testing.T) {
projectDir := t.TempDir()
gitIgnorePath := filepath.Join(projectDir, ".gitignore")
fileSet, err := NewFileSet(projectDir)
fileSet, err := NewFileSet(vfs.MustNew(projectDir))
require.NoError(t, err)
err = os.WriteFile(gitIgnorePath, []byte(".databricks"), 0o644)
require.NoError(t, err)

View File

@ -1,9 +1,12 @@
package git
import (
"io/fs"
"os"
"strings"
"time"
"github.com/databricks/cli/libs/vfs"
ignore "github.com/sabhiram/go-gitignore"
)
@ -21,7 +24,8 @@ type ignoreRules interface {
// ignoreFile represents a gitignore file backed by a path.
// If the path doesn't exist (yet), it is treated as an empty file.
type ignoreFile struct {
absPath string
root vfs.Path
path string
// Signal a reload of this file.
// Set this to call [os.Stat] and a potential reload
@ -35,9 +39,10 @@ type ignoreFile struct {
patterns *ignore.GitIgnore
}
func newIgnoreFile(absPath string) ignoreRules {
func newIgnoreFile(root vfs.Path, path string) ignoreRules {
return &ignoreFile{
absPath: absPath,
root: root,
path: path,
checkForReload: true,
}
}
@ -67,7 +72,7 @@ func (f *ignoreFile) Taint() {
func (f *ignoreFile) load() error {
// The file must be stat-able.
// If it doesn't exist, treat it as an empty file.
stat, err := os.Stat(f.absPath)
stat, err := fs.Stat(f.root, f.path)
if err != nil {
if os.IsNotExist(err) {
return nil
@ -82,7 +87,7 @@ func (f *ignoreFile) load() error {
}
f.modTime = stat.ModTime()
f.patterns, err = ignore.CompileIgnoreFile(f.absPath)
f.patterns, err = f.loadGitignore()
if err != nil {
return err
}
@ -90,6 +95,16 @@ func (f *ignoreFile) load() error {
return nil
}
func (f *ignoreFile) loadGitignore() (*ignore.GitIgnore, error) {
data, err := fs.ReadFile(f.root, f.path)
if err != nil {
return nil, err
}
lines := strings.Split(string(data), "\n")
return ignore.CompileIgnoreLines(lines...), nil
}
// stringIgnoreRules implements the [ignoreRules] interface
// for a set of in-memory ignore patterns.
type stringIgnoreRules struct {

View File

@ -5,6 +5,7 @@ import (
"path/filepath"
"testing"
"github.com/databricks/cli/libs/vfs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -13,7 +14,7 @@ func TestIgnoreFile(t *testing.T) {
var ign bool
var err error
f := newIgnoreFile("./testdata/.gitignore")
f := newIgnoreFile(vfs.MustNew("testdata"), ".gitignore")
ign, err = f.MatchesPath("root.foo")
require.NoError(t, err)
assert.True(t, ign)
@ -27,7 +28,7 @@ func TestIgnoreFileDoesntExist(t *testing.T) {
var err error
// Files that don't exist are treated as an empty gitignore file.
f := newIgnoreFile("./testdata/thispathdoesntexist")
f := newIgnoreFile(vfs.MustNew("testdata"), "thispathdoesntexist")
ign, err = f.MatchesPath("i'm included")
require.NoError(t, err)
assert.False(t, ign)
@ -41,7 +42,7 @@ func TestIgnoreFileTaint(t *testing.T) {
gitIgnorePath := filepath.Join(tempDir, ".gitignore")
// Files that don't exist are treated as an empty gitignore file.
f := newIgnoreFile(gitIgnorePath)
f := newIgnoreFile(vfs.MustNew(tempDir), ".gitignore")
ign, err = f.MatchesPath("hello")
require.NoError(t, err)
assert.False(t, ign)

View File

@ -2,10 +2,12 @@ package git
import (
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/databricks/cli/libs/vfs"
)
type ReferenceType string
@ -37,9 +39,9 @@ func isSHA1(s string) bool {
return re.MatchString(s)
}
func LoadReferenceFile(path string) (*Reference, error) {
func LoadReferenceFile(root vfs.Path, path string) (*Reference, error) {
// read reference file content
b, err := os.ReadFile(path)
b, err := fs.ReadFile(root, path)
if os.IsNotExist(err) {
return nil, nil
}
@ -73,8 +75,7 @@ func (ref *Reference) ResolvePath() (string, error) {
if ref.Type != ReferenceTypePointer {
return "", ErrNotAReferencePointer
}
refPath := strings.TrimPrefix(ref.Content, ReferencePrefix)
return filepath.FromSlash(refPath), nil
return strings.TrimPrefix(ref.Content, ReferencePrefix), nil
}
// resolves the name of the current branch from the reference file content. For example
@ -87,8 +88,6 @@ func (ref *Reference) CurrentBranch() (string, error) {
if err != nil {
return "", err
}
// normalize branch ref path to work accross different operating systems
branchRefPath = filepath.ToSlash(branchRefPath)
if !strings.HasPrefix(branchRefPath, HeadPathPrefix) {
return "", fmt.Errorf("reference path %s does not have expected prefix %s", branchRefPath, HeadPathPrefix)
}

View File

@ -6,6 +6,7 @@ import (
"strings"
"testing"
"github.com/databricks/cli/libs/vfs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -45,7 +46,7 @@ func TestReferenceReferencePathForReference(t *testing.T) {
}
path, err := ref.ResolvePath()
assert.NoError(t, err)
assert.Equal(t, filepath.FromSlash("refs/heads/my-branch"), path)
assert.Equal(t, "refs/heads/my-branch", path)
}
func TestReferenceLoadingForObjectID(t *testing.T) {
@ -55,7 +56,7 @@ func TestReferenceLoadingForObjectID(t *testing.T) {
defer f.Close()
f.WriteString(strings.Repeat("e", 40) + "\r\n")
ref, err := LoadReferenceFile(filepath.Join(tmp, "HEAD"))
ref, err := LoadReferenceFile(vfs.MustNew(tmp), "HEAD")
assert.NoError(t, err)
assert.Equal(t, ReferenceTypeSHA1, ref.Type)
assert.Equal(t, strings.Repeat("e", 40), ref.Content)
@ -68,7 +69,7 @@ func TestReferenceLoadingForReference(t *testing.T) {
defer f.Close()
f.WriteString("ref: refs/heads/foo\n")
ref, err := LoadReferenceFile(filepath.Join(tmp, "HEAD"))
ref, err := LoadReferenceFile(vfs.MustNew(tmp), "HEAD")
assert.NoError(t, err)
assert.Equal(t, ReferenceTypePointer, ref.Type)
assert.Equal(t, "ref: refs/heads/foo", ref.Content)
@ -81,7 +82,7 @@ func TestReferenceLoadingFailsForInvalidContent(t *testing.T) {
defer f.Close()
f.WriteString("abc")
_, err = LoadReferenceFile(filepath.Join(tmp, "HEAD"))
_, err = LoadReferenceFile(vfs.MustNew(tmp), "HEAD")
assert.ErrorContains(t, err, "unknown format for git HEAD")
}

View File

@ -7,7 +7,7 @@ import (
"path/filepath"
"strings"
"github.com/databricks/cli/libs/folders"
"github.com/databricks/cli/libs/vfs"
)
const gitIgnoreFileName = ".gitignore"
@ -21,8 +21,8 @@ type Repository struct {
// directory where we process .gitignore files.
real bool
// rootPath is the absolute path to the repository root.
rootPath string
// root is the absolute path to the repository root.
root vfs.Path
// ignore contains a list of ignore patterns indexed by the
// path prefix relative to the repository root.
@ -42,12 +42,12 @@ type Repository struct {
// Root returns the absolute path to the repository root.
func (r *Repository) Root() string {
return r.rootPath
return r.root.Native()
}
func (r *Repository) CurrentBranch() (string, error) {
// load .git/HEAD
ref, err := LoadReferenceFile(filepath.Join(r.rootPath, GitDirectoryName, "HEAD"))
ref, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, "HEAD"))
if err != nil {
return "", err
}
@ -64,7 +64,7 @@ func (r *Repository) CurrentBranch() (string, error) {
func (r *Repository) LatestCommit() (string, error) {
// load .git/HEAD
ref, err := LoadReferenceFile(filepath.Join(r.rootPath, GitDirectoryName, "HEAD"))
ref, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, "HEAD"))
if err != nil {
return "", err
}
@ -83,7 +83,7 @@ func (r *Repository) LatestCommit() (string, error) {
if err != nil {
return "", err
}
branchHeadRef, err := LoadReferenceFile(filepath.Join(r.rootPath, GitDirectoryName, branchHeadPath))
branchHeadRef, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, branchHeadPath))
if err != nil {
return "", err
}
@ -108,7 +108,7 @@ func (r *Repository) loadConfig() error {
if err != nil {
return fmt.Errorf("unable to load user specific gitconfig: %w", err)
}
err = config.loadFile(filepath.Join(r.rootPath, ".git/config"))
err = config.loadFile(r.root, ".git/config")
if err != nil {
return fmt.Errorf("unable to load repository specific gitconfig: %w", err)
}
@ -119,7 +119,7 @@ func (r *Repository) loadConfig() error {
// newIgnoreFile constructs a new [ignoreRules] implementation backed by
// a file using the specified path relative to the repository root.
func (r *Repository) newIgnoreFile(relativeIgnoreFilePath string) ignoreRules {
return newIgnoreFile(filepath.Join(r.rootPath, relativeIgnoreFilePath))
return newIgnoreFile(r.root, relativeIgnoreFilePath)
}
// getIgnoreRules returns a slice of [ignoreRules] that apply
@ -132,7 +132,7 @@ func (r *Repository) getIgnoreRules(prefix string) []ignoreRules {
return fs
}
r.ignore[prefix] = append(r.ignore[prefix], r.newIgnoreFile(filepath.Join(prefix, gitIgnoreFileName)))
r.ignore[prefix] = append(r.ignore[prefix], r.newIgnoreFile(path.Join(prefix, gitIgnoreFileName)))
return r.ignore[prefix]
}
@ -149,7 +149,7 @@ func (r *Repository) taintIgnoreRules() {
// Ignore computes whether to ignore the specified path.
// The specified path is relative to the repository root path.
func (r *Repository) Ignore(relPath string) (bool, error) {
parts := strings.Split(filepath.ToSlash(relPath), "/")
parts := strings.Split(relPath, "/")
// Retain trailing slash for directory patterns.
// We know a trailing slash was present if the last element
@ -186,14 +186,9 @@ func (r *Repository) Ignore(relPath string) (bool, error) {
return false, nil
}
func NewRepository(path string) (*Repository, error) {
path, err := filepath.Abs(path)
if err != nil {
return nil, err
}
func NewRepository(path vfs.Path) (*Repository, error) {
real := true
rootPath, err := folders.FindDirWithLeaf(path, GitDirectoryName)
rootPath, err := vfs.FindLeafInTree(path, GitDirectoryName)
if err != nil {
if !os.IsNotExist(err) {
return nil, err
@ -206,7 +201,7 @@ func NewRepository(path string) (*Repository, error) {
repo := &Repository{
real: real,
rootPath: rootPath,
root: rootPath,
ignore: make(map[string][]ignoreRules),
}
@ -221,13 +216,21 @@ func NewRepository(path string) (*Repository, error) {
return nil, fmt.Errorf("unable to access core excludes file: %w", err)
}
// Load global excludes on this machine.
// This is by definition a local path so we create a new [vfs.Path] instance.
coreExcludes := newStringIgnoreRules([]string{})
if coreExcludesPath != "" {
dir := filepath.Dir(coreExcludesPath)
base := filepath.Base(coreExcludesPath)
coreExcludes = newIgnoreFile(vfs.MustNew(dir), base)
}
// Initialize root ignore rules.
// These are special and not lazily initialized because:
// 1) we include a hardcoded ignore pattern
// 2) we include a gitignore file at a non-standard path
repo.ignore["."] = []ignoreRules{
// Load global excludes on this machine.
newIgnoreFile(coreExcludesPath),
coreExcludes,
// Always ignore root .git directory.
newStringIgnoreRules([]string{
".git",

View File

@ -7,6 +7,7 @@ import (
"strings"
"testing"
"github.com/databricks/cli/libs/vfs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -43,7 +44,7 @@ func newTestRepository(t *testing.T) *testRepository {
_, err = f2.WriteString(`ref: refs/heads/main`)
require.NoError(t, err)
repo, err := NewRepository(tmp)
repo, err := NewRepository(vfs.MustNew(tmp))
require.NoError(t, err)
return &testRepository{
@ -53,7 +54,7 @@ func newTestRepository(t *testing.T) *testRepository {
}
func (testRepo *testRepository) checkoutCommit(commitId string) {
f, err := os.OpenFile(filepath.Join(testRepo.r.rootPath, ".git", "HEAD"), os.O_WRONLY|os.O_TRUNC, os.ModePerm)
f, err := os.OpenFile(filepath.Join(testRepo.r.Root(), ".git", "HEAD"), os.O_WRONLY|os.O_TRUNC, os.ModePerm)
require.NoError(testRepo.t, err)
defer f.Close()
@ -63,7 +64,7 @@ func (testRepo *testRepository) checkoutCommit(commitId string) {
func (testRepo *testRepository) addBranch(name string, latestCommit string) {
// create dir for branch head reference
branchDir := filepath.Join(testRepo.r.rootPath, ".git", "refs", "heads")
branchDir := filepath.Join(testRepo.r.Root(), ".git", "refs", "heads")
err := os.MkdirAll(branchDir, os.ModePerm)
require.NoError(testRepo.t, err)
@ -78,7 +79,7 @@ func (testRepo *testRepository) addBranch(name string, latestCommit string) {
}
func (testRepo *testRepository) checkoutBranch(name string) {
f, err := os.OpenFile(filepath.Join(testRepo.r.rootPath, ".git", "HEAD"), os.O_WRONLY|os.O_TRUNC, os.ModePerm)
f, err := os.OpenFile(filepath.Join(testRepo.r.Root(), ".git", "HEAD"), os.O_WRONLY|os.O_TRUNC, os.ModePerm)
require.NoError(testRepo.t, err)
defer f.Close()
@ -89,7 +90,7 @@ func (testRepo *testRepository) checkoutBranch(name string) {
// add remote origin url to test repo
func (testRepo *testRepository) addOriginUrl(url string) {
// open config in append mode
f, err := os.OpenFile(filepath.Join(testRepo.r.rootPath, ".git", "config"), os.O_WRONLY|os.O_APPEND, os.ModePerm)
f, err := os.OpenFile(filepath.Join(testRepo.r.Root(), ".git", "config"), os.O_WRONLY|os.O_APPEND, os.ModePerm)
require.NoError(testRepo.t, err)
defer f.Close()
@ -128,7 +129,7 @@ func (testRepo *testRepository) assertOriginUrl(expected string) {
func TestRepository(t *testing.T) {
// Load this repository as test.
repo, err := NewRepository("../..")
repo, err := NewRepository(vfs.MustNew("../.."))
tr := testRepository{t, repo}
require.NoError(t, err)
@ -142,7 +143,7 @@ func TestRepository(t *testing.T) {
assert.True(t, tr.Ignore("vendor/"))
// Check that ignores under testdata work.
assert.True(t, tr.Ignore(filepath.Join("libs", "git", "testdata", "root.ignoreme")))
assert.True(t, tr.Ignore("libs/git/testdata/root.ignoreme"))
}
func TestRepositoryGitConfigForEmptyRepo(t *testing.T) {
@ -192,7 +193,7 @@ func TestRepositoryGitConfigForSshUrl(t *testing.T) {
func TestRepositoryGitConfigWhenNotARepo(t *testing.T) {
tmp := t.TempDir()
repo, err := NewRepository(tmp)
repo, err := NewRepository(vfs.MustNew(tmp))
require.NoError(t, err)
branch, err := repo.CurrentBranch()

View File

@ -1,9 +1,13 @@
package git
import (
"fmt"
"os"
"path"
"path/filepath"
"strings"
"github.com/databricks/cli/libs/vfs"
)
// View represents a view on a directory tree that takes into account
@ -29,17 +33,15 @@ type View struct {
// Ignore computes whether to ignore the specified path.
// The specified path is relative to the view's target path.
func (v *View) Ignore(path string) (bool, error) {
path = filepath.ToSlash(path)
func (v *View) Ignore(relPath string) (bool, error) {
// Retain trailing slash for directory patterns.
// Needs special handling because it is removed by path cleaning.
trailingSlash := ""
if strings.HasSuffix(path, "/") {
if strings.HasSuffix(relPath, "/") {
trailingSlash = "/"
}
return v.repo.Ignore(filepath.Join(v.targetPath, path) + trailingSlash)
return v.repo.Ignore(path.Join(v.targetPath, relPath) + trailingSlash)
}
// IgnoreFile returns if the gitignore rules in this fileset
@ -70,26 +72,27 @@ func (v *View) IgnoreDirectory(dir string) (bool, error) {
return v.Ignore(dir + "/")
}
func NewView(path string) (*View, error) {
path, err := filepath.Abs(path)
if err != nil {
return nil, err
}
repo, err := NewRepository(path)
func NewView(root vfs.Path) (*View, error) {
repo, err := NewRepository(root)
if err != nil {
return nil, err
}
// Target path must be relative to the repository root path.
targetPath, err := filepath.Rel(repo.rootPath, path)
if err != nil {
return nil, err
target := root.Native()
prefix := repo.root.Native()
if !strings.HasPrefix(target, prefix) {
return nil, fmt.Errorf("path %q is not within repository root %q", root.Native(), prefix)
}
// Make target a relative path.
target = strings.TrimPrefix(target, prefix)
target = strings.TrimPrefix(target, string(os.PathSeparator))
target = path.Clean(filepath.ToSlash(target))
return &View{
repo: repo,
targetPath: targetPath,
targetPath: target,
}, nil
}

View File

@ -7,6 +7,7 @@ import (
"path/filepath"
"testing"
"github.com/databricks/cli/libs/vfs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -89,19 +90,19 @@ func testViewAtRoot(t *testing.T, tv testView) {
}
func TestViewRootInBricksRepo(t *testing.T) {
v, err := NewView("./testdata")
v, err := NewView(vfs.MustNew("./testdata"))
require.NoError(t, err)
testViewAtRoot(t, testView{t, v})
}
func TestViewRootInTempRepo(t *testing.T) {
v, err := NewView(createFakeRepo(t, "testdata"))
v, err := NewView(vfs.MustNew(createFakeRepo(t, "testdata")))
require.NoError(t, err)
testViewAtRoot(t, testView{t, v})
}
func TestViewRootInTempDir(t *testing.T) {
v, err := NewView(copyTestdata(t, "testdata"))
v, err := NewView(vfs.MustNew(copyTestdata(t, "testdata")))
require.NoError(t, err)
testViewAtRoot(t, testView{t, v})
}
@ -124,20 +125,20 @@ func testViewAtA(t *testing.T, tv testView) {
}
func TestViewAInBricksRepo(t *testing.T) {
v, err := NewView("./testdata/a")
v, err := NewView(vfs.MustNew("./testdata/a"))
require.NoError(t, err)
testViewAtA(t, testView{t, v})
}
func TestViewAInTempRepo(t *testing.T) {
v, err := NewView(filepath.Join(createFakeRepo(t, "testdata"), "a"))
v, err := NewView(vfs.MustNew(filepath.Join(createFakeRepo(t, "testdata"), "a")))
require.NoError(t, err)
testViewAtA(t, testView{t, v})
}
func TestViewAInTempDir(t *testing.T) {
// Since this is not a fake repo it should not traverse up the tree.
v, err := NewView(filepath.Join(copyTestdata(t, "testdata"), "a"))
v, err := NewView(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a")))
require.NoError(t, err)
tv := testView{t, v}
@ -174,20 +175,20 @@ func testViewAtAB(t *testing.T, tv testView) {
}
func TestViewABInBricksRepo(t *testing.T) {
v, err := NewView("./testdata/a/b")
v, err := NewView(vfs.MustNew("./testdata/a/b"))
require.NoError(t, err)
testViewAtAB(t, testView{t, v})
}
func TestViewABInTempRepo(t *testing.T) {
v, err := NewView(filepath.Join(createFakeRepo(t, "testdata"), "a", "b"))
v, err := NewView(vfs.MustNew(filepath.Join(createFakeRepo(t, "testdata"), "a", "b")))
require.NoError(t, err)
testViewAtAB(t, testView{t, v})
}
func TestViewABInTempDir(t *testing.T) {
// Since this is not a fake repo it should not traverse up the tree.
v, err := NewView(filepath.Join(copyTestdata(t, "testdata"), "a", "b"))
v, err := NewView(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a", "b")))
tv := testView{t, v}
require.NoError(t, err)
@ -214,7 +215,7 @@ func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredAtRoot(t *testing.T)
// Since root .gitignore already has .databricks, there should be no edits
// to root .gitignore
v, err := NewView(repoPath)
v, err := NewView(vfs.MustNew(repoPath))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
@ -234,7 +235,7 @@ func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredInSubdir(t *testing.T
// Since root .gitignore already has .databricks, there should be no edits
// to a/.gitignore
v, err := NewView(filepath.Join(repoPath, "a"))
v, err := NewView(vfs.MustNew(filepath.Join(repoPath, "a")))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
@ -252,7 +253,7 @@ func TestViewAddsGitignoreWithCacheDir(t *testing.T) {
assert.NoError(t, err)
// Since root .gitignore was deleted, new view adds .databricks to root .gitignore
v, err := NewView(repoPath)
v, err := NewView(vfs.MustNew(repoPath))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
@ -270,7 +271,7 @@ func TestViewAddsGitignoreWithCacheDirAtSubdir(t *testing.T) {
require.NoError(t, err)
// Since root .gitignore was deleted, new view adds .databricks to a/.gitignore
v, err := NewView(filepath.Join(repoPath, "a"))
v, err := NewView(vfs.MustNew(filepath.Join(repoPath, "a")))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
@ -287,7 +288,7 @@ func TestViewAddsGitignoreWithCacheDirAtSubdir(t *testing.T) {
func TestViewAlwaysIgnoresCacheDir(t *testing.T) {
repoPath := createFakeRepo(t, "testdata")
v, err := NewView(repoPath)
v, err := NewView(vfs.MustNew(repoPath))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()

View File

@ -4,6 +4,7 @@ import (
"bufio"
"bytes"
"io"
"io/fs"
"os"
"path/filepath"
"strings"
@ -15,8 +16,8 @@ import (
const headerLength = 32
// readHeader reads the first N bytes from a file.
func readHeader(path string) ([]byte, error) {
f, err := os.Open(path)
func readHeader(fsys fs.FS, name string) ([]byte, error) {
f, err := fsys.Open(name)
if err != nil {
return nil, err
}
@ -36,10 +37,10 @@ func readHeader(path string) ([]byte, error) {
// Detect returns whether the file at path is a Databricks notebook.
// If it is, it returns the notebook language.
func Detect(path string) (notebook bool, language workspace.Language, err error) {
func DetectWithFS(fsys fs.FS, name string) (notebook bool, language workspace.Language, err error) {
header := ""
buf, err := readHeader(path)
buf, err := readHeader(fsys, name)
if err != nil {
return false, "", err
}
@ -48,7 +49,7 @@ func Detect(path string) (notebook bool, language workspace.Language, err error)
fileHeader := scanner.Text()
// Determine which header to expect based on filename extension.
ext := strings.ToLower(filepath.Ext(path))
ext := strings.ToLower(filepath.Ext(name))
switch ext {
case ".py":
header = `# Databricks notebook source`
@ -63,7 +64,7 @@ func Detect(path string) (notebook bool, language workspace.Language, err error)
header = "-- Databricks notebook source"
language = workspace.LanguageSql
case ".ipynb":
return DetectJupyter(path)
return DetectJupyterWithFS(fsys, name)
default:
return false, "", nil
}
@ -74,3 +75,11 @@ func Detect(path string) (notebook bool, language workspace.Language, err error)
return true, language, nil
}
// Detect calls DetectWithFS with the local filesystem.
// The name argument may be a local relative path or a local absolute path.
func Detect(name string) (notebook bool, language workspace.Language, err error) {
d := filepath.ToSlash(filepath.Dir(name))
b := filepath.Base(name)
return DetectWithFS(os.DirFS(d), b)
}

View File

@ -3,7 +3,9 @@ package notebook
import (
"encoding/json"
"fmt"
"io/fs"
"os"
"path/filepath"
"github.com/databricks/databricks-sdk-go/service/workspace"
)
@ -56,8 +58,8 @@ func resolveLanguage(nb *jupyter) workspace.Language {
// DetectJupyter returns whether the file at path is a valid Jupyter notebook.
// We assume it is valid if we can read it as JSON and see a couple expected fields.
// If we cannot, importing into the workspace will always fail, so we also return an error.
func DetectJupyter(path string) (notebook bool, language workspace.Language, err error) {
f, err := os.Open(path)
func DetectJupyterWithFS(fsys fs.FS, name string) (notebook bool, language workspace.Language, err error) {
f, err := fsys.Open(name)
if err != nil {
return false, "", err
}
@ -68,18 +70,26 @@ func DetectJupyter(path string) (notebook bool, language workspace.Language, err
dec := json.NewDecoder(f)
err = dec.Decode(&nb)
if err != nil {
return false, "", fmt.Errorf("%s: error loading Jupyter notebook file: %w", path, err)
return false, "", fmt.Errorf("%s: error loading Jupyter notebook file: %w", name, err)
}
// Not a Jupyter notebook if the cells or metadata fields aren't defined.
if nb.Cells == nil || nb.Metadata == nil {
return false, "", fmt.Errorf("%s: invalid Jupyter notebook file", path)
return false, "", fmt.Errorf("%s: invalid Jupyter notebook file", name)
}
// Major version must be at least 4.
if nb.NbFormatMajor < 4 {
return false, "", fmt.Errorf("%s: unsupported Jupyter notebook version: %d", path, nb.NbFormatMajor)
return false, "", fmt.Errorf("%s: unsupported Jupyter notebook version: %d", name, nb.NbFormatMajor)
}
return true, resolveLanguage(&nb), nil
}
// DetectJupyter calls DetectJupyterWithFS with the local filesystem.
// The name argument may be a local relative path or a local absolute path.
func DetectJupyter(name string) (notebook bool, language workspace.Language, err error) {
d := filepath.ToSlash(filepath.Dir(name))
b := filepath.Base(name)
return DetectJupyterWithFS(os.DirFS(d), b)
}

View File

@ -2,7 +2,6 @@ package sync
import (
"path"
"path/filepath"
"golang.org/x/exp/maps"
)
@ -64,7 +63,7 @@ func (d *diff) addFilesWithRemoteNameChanged(after *SnapshotState, before *Snaps
func (d *diff) addNewFiles(after *SnapshotState, before *SnapshotState) {
for localName := range after.LastModifiedTimes {
if _, ok := before.LastModifiedTimes[localName]; !ok {
d.put = append(d.put, filepath.ToSlash(localName))
d.put = append(d.put, localName)
}
}
@ -79,7 +78,7 @@ func (d *diff) addUpdatedFiles(after *SnapshotState, before *SnapshotState) {
for localName, modTime := range after.LastModifiedTimes {
prevModTime, ok := before.LastModifiedTimes[localName]
if ok && modTime.After(prevModTime) {
d.put = append(d.put, filepath.ToSlash(localName))
d.put = append(d.put, localName)
}
}
}

View File

@ -2,7 +2,6 @@ package sync
import (
"path"
"path/filepath"
"sort"
)
@ -16,8 +15,8 @@ func MakeDirSet(files []string) DirSet {
// Iterate over all files.
for _, f := range files {
// Get the directory of the file in /-separated form.
dir := filepath.ToSlash(filepath.Dir(f))
// Get the directory of the file.
dir := path.Dir(f)
// Add this directory and its parents until it is either "." or already in the set.
for dir != "." {

View File

@ -172,6 +172,11 @@ func loadOrNewSnapshot(ctx context.Context, opts *SyncOptions) (*Snapshot, error
return nil, fmt.Errorf("failed to json unmarshal persisted snapshot: %s", err)
}
// Ensure that all paths are slash-separated upon loading
// an existing snapshot file. If it was created by an older
// CLI version (<= v0.220.0), it may contain backslashes.
snapshot.SnapshotState = snapshot.SnapshotState.ToSlash()
snapshot.New = false
return snapshot, nil
}

View File

@ -2,6 +2,7 @@ package sync
import (
"fmt"
"path"
"path/filepath"
"strings"
"time"
@ -48,7 +49,7 @@ func NewSnapshotState(localFiles []fileset.File) (*SnapshotState, error) {
for k := range localFiles {
f := &localFiles[k]
// Compute the remote name the file will have in WSFS
remoteName := filepath.ToSlash(f.Relative)
remoteName := f.Relative
isNotebook, err := f.IsNotebook()
if err != nil {
@ -57,7 +58,7 @@ func NewSnapshotState(localFiles []fileset.File) (*SnapshotState, error) {
continue
}
if isNotebook {
ext := filepath.Ext(remoteName)
ext := path.Ext(remoteName)
remoteName = strings.TrimSuffix(remoteName, ext)
}
@ -119,3 +120,30 @@ func (fs *SnapshotState) validate() error {
}
return nil
}
// ToSlash ensures all local paths in the snapshot state
// are slash-separated. Returns a new snapshot state.
func (old SnapshotState) ToSlash() *SnapshotState {
new := SnapshotState{
LastModifiedTimes: make(map[string]time.Time),
LocalToRemoteNames: make(map[string]string),
RemoteToLocalNames: make(map[string]string),
}
// Keys are local paths.
for k, v := range old.LastModifiedTimes {
new.LastModifiedTimes[filepath.ToSlash(k)] = v
}
// Keys are local paths.
for k, v := range old.LocalToRemoteNames {
new.LocalToRemoteNames[filepath.ToSlash(k)] = v
}
// Values are remote paths.
for k, v := range old.RemoteToLocalNames {
new.RemoteToLocalNames[k] = filepath.ToSlash(v)
}
return &new
}

View File

@ -1,25 +1,27 @@
package sync
import (
"runtime"
"testing"
"time"
"github.com/databricks/cli/libs/fileset"
"github.com/databricks/cli/libs/vfs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSnapshotState(t *testing.T) {
fileSet := fileset.New("./testdata/sync-fileset")
fileSet := fileset.New(vfs.MustNew("./testdata/sync-fileset"))
files, err := fileSet.All()
require.NoError(t, err)
// Assert initial contents of the fileset
assert.Len(t, files, 4)
assert.Equal(t, "invalid-nb.ipynb", files[0].Name())
assert.Equal(t, "my-nb.py", files[1].Name())
assert.Equal(t, "my-script.py", files[2].Name())
assert.Equal(t, "valid-nb.ipynb", files[3].Name())
assert.Equal(t, "invalid-nb.ipynb", files[0].Relative)
assert.Equal(t, "my-nb.py", files[1].Relative)
assert.Equal(t, "my-script.py", files[2].Relative)
assert.Equal(t, "valid-nb.ipynb", files[3].Relative)
// Assert snapshot state generated from the fileset. Note that the invalid notebook
// has been ignored.
@ -114,3 +116,30 @@ func TestSnapshotStateValidationErrors(t *testing.T) {
}
assert.EqualError(t, s.validate(), "invalid sync state representation. Inconsistent values found. Remote file c points to a. Local file a points to b")
}
func TestSnapshotStateWithBackslashes(t *testing.T) {
if runtime.GOOS != "windows" {
t.Skip("Skipping test on non-Windows platform")
}
now := time.Now()
s1 := &SnapshotState{
LastModifiedTimes: map[string]time.Time{
"foo\\bar.py": now,
},
LocalToRemoteNames: map[string]string{
"foo\\bar.py": "foo/bar",
},
RemoteToLocalNames: map[string]string{
"foo/bar": "foo\\bar.py",
},
}
assert.NoError(t, s1.validate())
s2 := s1.ToSlash()
assert.NoError(t, s1.validate())
assert.Equal(t, map[string]time.Time{"foo/bar.py": now}, s2.LastModifiedTimes)
assert.Equal(t, map[string]string{"foo/bar.py": "foo/bar"}, s2.LocalToRemoteNames)
assert.Equal(t, map[string]string{"foo/bar": "foo/bar.py"}, s2.RemoteToLocalNames)
}

View File

@ -10,6 +10,7 @@ import (
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/testfile"
"github.com/databricks/cli/libs/vfs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -29,7 +30,7 @@ func TestDiff(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{
@ -93,7 +94,7 @@ func TestSymlinkDiff(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{
@ -124,7 +125,7 @@ func TestFolderDiff(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{
@ -169,7 +170,7 @@ func TestPythonNotebookDiff(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{
@ -244,7 +245,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{
@ -281,7 +282,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{

View File

@ -10,12 +10,13 @@ import (
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/set"
"github.com/databricks/cli/libs/vfs"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/service/iam"
)
type SyncOptions struct {
LocalPath string
LocalPath vfs.Path
RemotePath string
Include []string
Exclude []string
@ -54,6 +55,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
if err != nil {
return nil, err
}
err = fileSet.EnsureValidGitIgnoreExists()
if err != nil {
return nil, err
@ -186,7 +188,7 @@ func (s *Sync) GetFileList(ctx context.Context) ([]fileset.File, error) {
// tradeoff: doing portable monitoring only due to macOS max descriptor manual ulimit setting requirement
// https://github.com/gorakhargosh/watchdog/blob/master/src/watchdog/observers/kqueue.py#L394-L418
all := set.NewSetF(func(f fileset.File) string {
return f.Absolute
return f.Relative
})
gitFiles, err := s.fileSet.All()
if err != nil {

View File

@ -8,6 +8,7 @@ import (
"github.com/databricks/cli/libs/fileset"
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/vfs"
"github.com/stretchr/testify/require"
)
@ -73,16 +74,17 @@ func TestGetFileSet(t *testing.T) {
ctx := context.Background()
dir := setupFiles(t)
fileSet, err := git.NewFileSet(dir)
root := vfs.MustNew(dir)
fileSet, err := git.NewFileSet(root)
require.NoError(t, err)
err = fileSet.EnsureValidGitIgnoreExists()
require.NoError(t, err)
inc, err := fileset.NewGlobSet(dir, []string{})
inc, err := fileset.NewGlobSet(root, []string{})
require.NoError(t, err)
excl, err := fileset.NewGlobSet(dir, []string{})
excl, err := fileset.NewGlobSet(root, []string{})
require.NoError(t, err)
s := &Sync{
@ -97,10 +99,10 @@ func TestGetFileSet(t *testing.T) {
require.NoError(t, err)
require.Equal(t, len(fileList), 9)
inc, err = fileset.NewGlobSet(dir, []string{})
inc, err = fileset.NewGlobSet(root, []string{})
require.NoError(t, err)
excl, err = fileset.NewGlobSet(dir, []string{"*.go"})
excl, err = fileset.NewGlobSet(root, []string{"*.go"})
require.NoError(t, err)
s = &Sync{
@ -115,10 +117,10 @@ func TestGetFileSet(t *testing.T) {
require.NoError(t, err)
require.Equal(t, len(fileList), 1)
inc, err = fileset.NewGlobSet(dir, []string{".databricks/*"})
inc, err = fileset.NewGlobSet(root, []string{".databricks/*"})
require.NoError(t, err)
excl, err = fileset.NewGlobSet(dir, []string{})
excl, err = fileset.NewGlobSet(root, []string{})
require.NoError(t, err)
s = &Sync{
@ -138,16 +140,17 @@ func TestRecursiveExclude(t *testing.T) {
ctx := context.Background()
dir := setupFiles(t)
fileSet, err := git.NewFileSet(dir)
root := vfs.MustNew(dir)
fileSet, err := git.NewFileSet(root)
require.NoError(t, err)
err = fileSet.EnsureValidGitIgnoreExists()
require.NoError(t, err)
inc, err := fileset.NewGlobSet(dir, []string{})
inc, err := fileset.NewGlobSet(root, []string{})
require.NoError(t, err)
excl, err := fileset.NewGlobSet(dir, []string{"test/**"})
excl, err := fileset.NewGlobSet(root, []string{"test/**"})
require.NoError(t, err)
s := &Sync{

View File

@ -4,8 +4,6 @@ import (
"context"
"errors"
"io/fs"
"os"
"path/filepath"
"github.com/databricks/cli/libs/filer"
"github.com/databricks/cli/libs/log"
@ -59,7 +57,7 @@ func (s *Sync) applyMkdir(ctx context.Context, localName string) error {
func (s *Sync) applyPut(ctx context.Context, localName string) error {
s.notifyProgress(ctx, EventActionPut, localName, 0.0)
localFile, err := os.Open(filepath.Join(s.LocalPath, localName))
localFile, err := s.LocalPath.Open(localName)
if err != nil {
return err
}

29
libs/vfs/leaf.go Normal file
View File

@ -0,0 +1,29 @@
package vfs
import (
"errors"
"io/fs"
)
// FindLeafInTree returns the first path that holds `name`,
// traversing up to the root of the filesystem, starting at `p`.
func FindLeafInTree(p Path, name string) (Path, error) {
for p != nil {
_, err := fs.Stat(p, name)
// No error means we found the leaf in p.
if err == nil {
return p, nil
}
// ErrNotExist means we continue traversal up the tree.
if errors.Is(err, fs.ErrNotExist) {
p = p.Parent()
continue
}
return nil, err
}
return nil, fs.ErrNotExist
}

38
libs/vfs/leaf_test.go Normal file
View File

@ -0,0 +1,38 @@
package vfs
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFindLeafInTree(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
root := filepath.Join(wd, "..", "..")
// Find from working directory should work.
{
out, err := FindLeafInTree(MustNew(wd), ".git")
assert.NoError(t, err)
assert.Equal(t, root, out.Native())
}
// Find from project root itself should work.
{
out, err := FindLeafInTree(MustNew(root), ".git")
assert.NoError(t, err)
assert.Equal(t, root, out.Native())
}
// Find for something that doesn't exist should work.
{
out, err := FindLeafInTree(MustNew(root), "this-leaf-doesnt-exist-anywhere")
assert.ErrorIs(t, err, os.ErrNotExist)
assert.Equal(t, nil, out)
}
}

82
libs/vfs/os.go Normal file
View File

@ -0,0 +1,82 @@
package vfs
import (
"io/fs"
"os"
"path/filepath"
)
type osPath struct {
path string
openFn func(name string) (fs.File, error)
statFn func(name string) (fs.FileInfo, error)
readDirFn func(name string) ([]fs.DirEntry, error)
readFileFn func(name string) ([]byte, error)
}
func New(name string) (Path, error) {
abs, err := filepath.Abs(name)
if err != nil {
return nil, err
}
return newOsPath(abs), nil
}
func MustNew(name string) Path {
p, err := New(name)
if err != nil {
panic(err)
}
return p
}
func newOsPath(name string) Path {
if !filepath.IsAbs(name) {
panic("vfs: abs path must be absolute")
}
// [os.DirFS] implements all required interfaces.
// We used type assertion below to get the underlying types.
dirfs := os.DirFS(name)
return &osPath{
path: name,
openFn: dirfs.Open,
statFn: dirfs.(fs.StatFS).Stat,
readDirFn: dirfs.(fs.ReadDirFS).ReadDir,
readFileFn: dirfs.(fs.ReadFileFS).ReadFile,
}
}
func (o osPath) Open(name string) (fs.File, error) {
return o.openFn(name)
}
func (o osPath) Stat(name string) (fs.FileInfo, error) {
return o.statFn(name)
}
func (o osPath) ReadDir(name string) ([]fs.DirEntry, error) {
return o.readDirFn(name)
}
func (o osPath) ReadFile(name string) ([]byte, error) {
return o.readFileFn(name)
}
func (o osPath) Parent() Path {
dir := filepath.Dir(o.path)
if dir == o.path {
return nil
}
return newOsPath(dir)
}
func (o osPath) Native() string {
return o.path
}

54
libs/vfs/os_test.go Normal file
View File

@ -0,0 +1,54 @@
package vfs
import (
"os"
"path/filepath"
"runtime"
"testing"
"github.com/stretchr/testify/require"
)
func TestOsNewWithRelativePath(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
p, err := New(".")
require.NoError(t, err)
require.Equal(t, wd, p.Native())
}
func TestOsPathParent(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
p := MustNew(wd)
require.NotNil(t, p)
// Traverse all the way to the root.
for {
q := p.Parent()
if q == nil {
// Parent returns nil when it is the root.
break
}
p = q
}
// We should have reached the root.
if runtime.GOOS == "windows" {
require.Equal(t, filepath.VolumeName(wd)+`\`, p.Native())
} else {
require.Equal(t, "/", p.Native())
}
}
func TestOsPathNative(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
p := MustNew(wd)
require.NotNil(t, p)
require.Equal(t, wd, p.Native())
}

29
libs/vfs/path.go Normal file
View File

@ -0,0 +1,29 @@
package vfs
import "io/fs"
// FS combines the fs.FS, fs.StatFS, fs.ReadDirFS, and fs.ReadFileFS interfaces.
// It mandates that Path implementations must support all these interfaces.
type FS interface {
fs.FS
fs.StatFS
fs.ReadDirFS
fs.ReadFileFS
}
// Path defines a read-only virtual file system interface for:
//
// 1. Intercepting file operations to inject custom logic (e.g., logging, access control).
// 2. Traversing directories to find specific leaf directories (e.g., .git).
// 3. Converting virtual paths to OS-native paths.
//
// Options 2 and 3 are not possible with the standard fs.FS interface.
// They are needed such that we can provide an instance to the sync package
// and still detect the containing .git directory and convert paths to native paths.
type Path interface {
FS
Parent() Path
Native() string
}

1
libs/vfs/path_test.go Normal file
View File

@ -0,0 +1 @@
package vfs