2023-01-23 12:52:39 +00:00
|
|
|
package sync
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-01-24 07:30:10 +00:00
|
|
|
"fmt"
|
2023-01-24 14:06:59 +00:00
|
|
|
"log"
|
|
|
|
"sync"
|
2023-01-23 12:52:39 +00:00
|
|
|
"time"
|
|
|
|
|
2023-01-31 18:19:16 +00:00
|
|
|
"github.com/databricks/bricks/libs/git"
|
2023-01-23 12:52:39 +00:00
|
|
|
"github.com/databricks/bricks/libs/sync/repofiles"
|
|
|
|
"github.com/databricks/databricks-sdk-go"
|
|
|
|
)
|
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
type SyncOptions struct {
|
2023-01-23 12:52:39 +00:00
|
|
|
LocalPath string
|
|
|
|
RemotePath string
|
|
|
|
|
2023-01-24 14:06:59 +00:00
|
|
|
Full bool
|
2023-01-23 12:52:39 +00:00
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
SnapshotBasePath string
|
|
|
|
|
2023-01-23 12:52:39 +00:00
|
|
|
PollInterval time.Duration
|
2023-01-24 07:30:10 +00:00
|
|
|
|
|
|
|
WorkspaceClient *databricks.WorkspaceClient
|
|
|
|
|
|
|
|
Host string
|
|
|
|
}
|
|
|
|
|
|
|
|
type Sync struct {
|
|
|
|
*SyncOptions
|
|
|
|
|
2023-01-31 17:34:36 +00:00
|
|
|
fileSet *git.FileSet
|
2023-01-24 14:06:59 +00:00
|
|
|
snapshot *Snapshot
|
|
|
|
repoFiles *repofiles.RepoFiles
|
2023-01-24 07:30:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// New initializes and returns a new [Sync] instance.
|
2023-01-24 12:58:10 +00:00
|
|
|
func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
|
2023-01-27 15:04:58 +00:00
|
|
|
fileSet, err := git.NewFileSet(opts.LocalPath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2023-01-31 17:34:36 +00:00
|
|
|
err = fileSet.EnsureValidGitIgnoreExists()
|
2023-01-24 07:30:10 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-01-24 12:58:10 +00:00
|
|
|
// Verify that the remote path we're about to synchronize to is valid and allowed.
|
|
|
|
err = ensureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
// TODO: The host may be late-initialized in certain Azure setups where we
|
|
|
|
// specify the workspace by its resource ID. tracked in: https://databricks.atlassian.net/browse/DECO-194
|
|
|
|
opts.Host = opts.WorkspaceClient.Config.Host
|
|
|
|
if opts.Host == "" {
|
|
|
|
return nil, fmt.Errorf("failed to resolve host for snapshot")
|
|
|
|
}
|
|
|
|
|
2023-01-24 14:06:59 +00:00
|
|
|
// For full sync, we start with an empty snapshot.
|
|
|
|
// For incremental sync, we try to load an existing snapshot to start from.
|
|
|
|
var snapshot *Snapshot
|
|
|
|
if opts.Full {
|
|
|
|
snapshot, err = newSnapshot(&opts)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("unable to instantiate new sync snapshot: %w", err)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
snapshot, err = loadOrNewSnapshot(&opts)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("unable to load sync snapshot: %w", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
repoFiles := repofiles.Create(opts.RemotePath, opts.LocalPath, opts.WorkspaceClient)
|
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
return &Sync{
|
|
|
|
SyncOptions: &opts,
|
2023-01-24 14:06:59 +00:00
|
|
|
|
|
|
|
fileSet: fileSet,
|
|
|
|
snapshot: snapshot,
|
|
|
|
repoFiles: repoFiles,
|
2023-01-24 07:30:10 +00:00
|
|
|
}, nil
|
2023-01-23 12:52:39 +00:00
|
|
|
}
|
|
|
|
|
2023-01-24 14:06:59 +00:00
|
|
|
func (s *Sync) RunOnce(ctx context.Context) error {
|
2023-01-24 07:30:10 +00:00
|
|
|
repoFiles := repofiles.Create(s.RemotePath, s.LocalPath, s.WorkspaceClient)
|
2023-01-24 14:06:59 +00:00
|
|
|
applyDiff := syncCallback(ctx, repoFiles)
|
|
|
|
|
|
|
|
// tradeoff: doing portable monitoring only due to macOS max descriptor manual ulimit setting requirement
|
|
|
|
// https://github.com/gorakhargosh/watchdog/blob/master/src/watchdog/observers/kqueue.py#L394-L418
|
|
|
|
all, err := s.fileSet.All()
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("[ERROR] cannot list files: %s", err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
change, err := s.snapshot.diff(all)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if change.IsEmpty() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Printf("[INFO] Action: %v", change)
|
|
|
|
err = applyDiff(change)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
err = s.snapshot.Save(ctx)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("[ERROR] cannot store snapshot: %s", err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Sync) RunContinuous(ctx context.Context) error {
|
|
|
|
var once sync.Once
|
|
|
|
|
|
|
|
ticker := time.NewTicker(s.PollInterval)
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
case <-ticker.C:
|
|
|
|
err := s.RunOnce(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
once.Do(func() {
|
|
|
|
log.Printf("[INFO] Initial Sync Complete")
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2023-01-23 12:52:39 +00:00
|
|
|
}
|