databricks-cli/libs/fileset/fileset.go

167 lines
4.1 KiB
Go

package fileset
import (
"fmt"
"io/fs"
"os"
pathlib "path"
"path/filepath"
"slices"
"github.com/databricks/cli/libs/vfs"
)
// FileSet facilitates recursive file listing for paths rooted at a given directory.
// It optionally takes into account ignore rules through the [Ignorer] interface.
type FileSet struct {
// Root path of the fileset.
root vfs.Path
// Paths to include in the fileset.
// Files are included as-is (if not ignored) and directories are traversed recursively.
// Defaults to []string{"."} if not specified.
paths []string
// Ignorer interface to check if a file or directory should be ignored.
ignore Ignorer
}
// New returns a [FileSet] for the given root path.
// It optionally accepts a list of paths relative to the root to include in the fileset.
// If not specified, it defaults to including all files in the root path.
func New(root vfs.Path, args ...[]string) *FileSet {
// Default to including all files in the root path.
if len(args) == 0 {
args = [][]string{{"."}}
}
// Collect list of normalized and cleaned paths.
var paths []string
for _, arg := range args {
for _, path := range arg {
path = filepath.ToSlash(path)
path = pathlib.Clean(path)
// Skip path if it's already in the list.
if slices.Contains(paths, path) {
continue
}
paths = append(paths, path)
}
}
return &FileSet{
root: root,
paths: paths,
ignore: nopIgnorer{},
}
}
// Ignorer returns the [FileSet]'s current ignorer.
func (w *FileSet) Ignorer() Ignorer {
return w.ignore
}
// SetIgnorer sets the [Ignorer] interface for this [FileSet].
func (w *FileSet) SetIgnorer(ignore Ignorer) {
w.ignore = ignore
}
// Files returns performs recursive listing on all configured paths and returns
// the collection of files it finds (and are not ignored).
// The returned slice does not contain duplicates.
// The order of files in the slice is stable.
func (w *FileSet) Files() (out []File, err error) {
seen := make(map[string]struct{})
for _, p := range w.paths {
files, err := w.recursiveListFiles(p, seen)
if err != nil {
return nil, err
}
out = append(out, files...)
}
return out, nil
}
// Recursively traverses dir in a depth first manner and returns a list of all files
// that are being tracked in the FileSet (ie not being ignored for matching one of the
// patterns in w.ignore)
func (w *FileSet) recursiveListFiles(path string, seen map[string]struct{}) (out []File, err error) {
err = fs.WalkDir(w.root, path, func(name string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
info, err := d.Info()
if err != nil {
return err
}
switch {
case info.Mode().IsDir():
ign, err := w.ignore.IgnoreDirectory(name)
if err != nil {
return fmt.Errorf("cannot check if %s should be ignored: %w", name, err)
}
if ign {
return fs.SkipDir
}
case info.Mode().IsRegular():
ign, err := w.ignore.IgnoreFile(name)
if err != nil {
return fmt.Errorf("cannot check if %s should be ignored: %w", name, err)
}
if ign {
return nil
}
// Skip duplicates
if _, ok := seen[name]; ok {
return nil
}
seen[name] = struct{}{}
out = append(out, NewFile(w.root, d, name))
// Special case handling for symlinks
case info.Mode()&os.ModeSymlink != 0:
ign, err := w.ignore.IgnoreFile(name)
if err != nil {
return fmt.Errorf("cannot check if %s should be ignored: %w", name, err)
}
if ign {
return nil
}
// Skip duplicates
if _, ok := seen[name]; ok {
return nil
}
linkname, err := filepath.EvalSymlinks(name)
if err != nil {
// eat this error if it happens since it is most likely to be hit for symlink
// that points to another one. Since symlinks were never supported before this
// would just keep the same behavior.
return nil
}
fileInfo, err := os.Stat(linkname)
if err != nil {
return fmt.Errorf("error stating symlink %s it will be ignored: %w", linkname, err)
}
seen[name] = struct{}{}
out = append(out, NewFile(w.root, fs.FileInfoToDirEntry(fileInfo), name))
default:
// Skip unsupported file modes
}
return nil
})
return
}