mirror of https://github.com/databricks/cli.git
243 lines
5.4 KiB
Go
243 lines
5.4 KiB
Go
package sync
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/databricks/cli/libs/filer"
|
|
"github.com/databricks/cli/libs/fileset"
|
|
"github.com/databricks/cli/libs/git"
|
|
"github.com/databricks/cli/libs/log"
|
|
"github.com/databricks/cli/libs/set"
|
|
"github.com/databricks/databricks-sdk-go"
|
|
"github.com/databricks/databricks-sdk-go/service/iam"
|
|
)
|
|
|
|
type SyncOptions struct {
|
|
LocalPath string
|
|
RemotePath string
|
|
Include []string
|
|
Exclude []string
|
|
|
|
Full bool
|
|
|
|
SnapshotBasePath string
|
|
|
|
PollInterval time.Duration
|
|
|
|
WorkspaceClient *databricks.WorkspaceClient
|
|
|
|
CurrentUser *iam.User
|
|
|
|
Host string
|
|
}
|
|
|
|
type Sync struct {
|
|
*SyncOptions
|
|
|
|
fileSet *git.FileSet
|
|
includeFileSet *fileset.FileSet
|
|
excludeFileSet *fileset.FileSet
|
|
|
|
snapshot *Snapshot
|
|
filer filer.Filer
|
|
|
|
// Synchronization progress events are sent to this event notifier.
|
|
notifier EventNotifier
|
|
seq int
|
|
}
|
|
|
|
// New initializes and returns a new [Sync] instance.
|
|
func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
|
|
fileSet, err := git.NewFileSet(opts.LocalPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
err = fileSet.EnsureValidGitIgnoreExists()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
includeFileSet, err := fileset.NewGlobSet(opts.LocalPath, opts.Include)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
excludeFileSet, err := fileset.NewGlobSet(opts.LocalPath, opts.Exclude)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Verify that the remote path we're about to synchronize to is valid and allowed.
|
|
err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath, opts.CurrentUser)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// TODO: The host may be late-initialized in certain Azure setups where we
|
|
// specify the workspace by its resource ID. tracked in: https://databricks.atlassian.net/browse/DECO-194
|
|
opts.Host = opts.WorkspaceClient.Config.Host
|
|
if opts.Host == "" {
|
|
return nil, fmt.Errorf("failed to resolve host for snapshot")
|
|
}
|
|
|
|
// For full sync, we start with an empty snapshot.
|
|
// For incremental sync, we try to load an existing snapshot to start from.
|
|
var snapshot *Snapshot
|
|
if opts.Full {
|
|
snapshot, err = newSnapshot(ctx, &opts)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to instantiate new sync snapshot: %w", err)
|
|
}
|
|
} else {
|
|
snapshot, err = loadOrNewSnapshot(ctx, &opts)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to load sync snapshot: %w", err)
|
|
}
|
|
}
|
|
|
|
filer, err := filer.NewWorkspaceFilesClient(opts.WorkspaceClient, opts.RemotePath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &Sync{
|
|
SyncOptions: &opts,
|
|
|
|
fileSet: fileSet,
|
|
includeFileSet: includeFileSet,
|
|
excludeFileSet: excludeFileSet,
|
|
snapshot: snapshot,
|
|
filer: filer,
|
|
notifier: &NopNotifier{},
|
|
seq: 0,
|
|
}, nil
|
|
}
|
|
|
|
func (s *Sync) Events() <-chan Event {
|
|
ch := make(chan Event, MaxRequestsInFlight)
|
|
s.notifier = &ChannelNotifier{ch}
|
|
return ch
|
|
}
|
|
|
|
func (s *Sync) Close() {
|
|
if s.notifier == nil {
|
|
return
|
|
}
|
|
s.notifier.Close()
|
|
s.notifier = nil
|
|
}
|
|
|
|
func (s *Sync) notifyStart(ctx context.Context, d diff) {
|
|
// If this is not the initial iteration we can ignore no-ops.
|
|
if s.seq > 0 && d.IsEmpty() {
|
|
return
|
|
}
|
|
s.notifier.Notify(ctx, newEventStart(s.seq, d.put, d.delete))
|
|
}
|
|
|
|
func (s *Sync) notifyProgress(ctx context.Context, action EventAction, path string, progress float32) {
|
|
s.notifier.Notify(ctx, newEventProgress(s.seq, action, path, progress))
|
|
}
|
|
|
|
func (s *Sync) notifyComplete(ctx context.Context, d diff) {
|
|
// If this is not the initial iteration we can ignore no-ops.
|
|
if s.seq > 0 && d.IsEmpty() {
|
|
return
|
|
}
|
|
s.notifier.Notify(ctx, newEventComplete(s.seq, d.put, d.delete))
|
|
s.seq++
|
|
}
|
|
|
|
func (s *Sync) RunOnce(ctx context.Context) error {
|
|
files, err := getFileList(ctx, s)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
change, err := s.snapshot.diff(ctx, files)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
s.notifyStart(ctx, change)
|
|
if change.IsEmpty() {
|
|
s.notifyComplete(ctx, change)
|
|
return nil
|
|
}
|
|
|
|
err = s.applyDiff(ctx, change)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = s.snapshot.Save(ctx)
|
|
if err != nil {
|
|
log.Errorf(ctx, "cannot store snapshot: %s", err)
|
|
return err
|
|
}
|
|
|
|
s.notifyComplete(ctx, change)
|
|
return nil
|
|
}
|
|
|
|
func getFileList(ctx context.Context, s *Sync) ([]fileset.File, error) {
|
|
// tradeoff: doing portable monitoring only due to macOS max descriptor manual ulimit setting requirement
|
|
// https://github.com/gorakhargosh/watchdog/blob/master/src/watchdog/observers/kqueue.py#L394-L418
|
|
all := set.NewSetF(func(f fileset.File) string {
|
|
return f.Absolute
|
|
})
|
|
gitFiles, err := s.fileSet.All()
|
|
if err != nil {
|
|
log.Errorf(ctx, "cannot list files: %s", err)
|
|
return nil, err
|
|
}
|
|
all.Add(gitFiles...)
|
|
|
|
include, err := s.includeFileSet.All()
|
|
if err != nil {
|
|
log.Errorf(ctx, "cannot list include files: %s", err)
|
|
return nil, err
|
|
}
|
|
|
|
all.Add(include...)
|
|
|
|
exclude, err := s.excludeFileSet.All()
|
|
if err != nil {
|
|
log.Errorf(ctx, "cannot list exclude files: %s", err)
|
|
return nil, err
|
|
}
|
|
|
|
for _, f := range exclude {
|
|
all.Remove(f)
|
|
}
|
|
|
|
return all.Iter(), nil
|
|
}
|
|
|
|
func (s *Sync) DestroySnapshot(ctx context.Context) error {
|
|
return s.snapshot.Destroy(ctx)
|
|
}
|
|
|
|
func (s *Sync) SnapshotPath() string {
|
|
return s.snapshot.SnapshotPath
|
|
}
|
|
|
|
func (s *Sync) RunContinuous(ctx context.Context) error {
|
|
ticker := time.NewTicker(s.PollInterval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-ticker.C:
|
|
err := s.RunOnce(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|