databricks-cli/libs/sync/sync.go

187 lines
4.2 KiB
Go
Raw Normal View History

package sync
import (
"context"
"fmt"
"time"
"github.com/databricks/bricks/libs/git"
"github.com/databricks/bricks/libs/log"
"github.com/databricks/bricks/libs/sync/repofiles"
"github.com/databricks/databricks-sdk-go"
)
type SyncOptions struct {
LocalPath string
RemotePath string
Full bool
SnapshotBasePath string
PollInterval time.Duration
WorkspaceClient *databricks.WorkspaceClient
Host string
}
type Sync struct {
*SyncOptions
fileSet *git.FileSet
snapshot *Snapshot
repoFiles *repofiles.RepoFiles
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
// Synchronization progress events are sent to this event notifier.
notifier EventNotifier
seq int
}
// New initializes and returns a new [Sync] instance.
func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
fileSet, err := git.NewFileSet(opts.LocalPath)
if err != nil {
return nil, err
}
err = fileSet.EnsureValidGitIgnoreExists()
if err != nil {
return nil, err
}
// Verify that the remote path we're about to synchronize to is valid and allowed.
err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath)
if err != nil {
return nil, err
}
// TODO: The host may be late-initialized in certain Azure setups where we
// specify the workspace by its resource ID. tracked in: https://databricks.atlassian.net/browse/DECO-194
opts.Host = opts.WorkspaceClient.Config.Host
if opts.Host == "" {
return nil, fmt.Errorf("failed to resolve host for snapshot")
}
// For full sync, we start with an empty snapshot.
// For incremental sync, we try to load an existing snapshot to start from.
var snapshot *Snapshot
if opts.Full {
snapshot, err = newSnapshot(ctx, &opts)
if err != nil {
return nil, fmt.Errorf("unable to instantiate new sync snapshot: %w", err)
}
} else {
snapshot, err = loadOrNewSnapshot(ctx, &opts)
if err != nil {
return nil, fmt.Errorf("unable to load sync snapshot: %w", err)
}
}
repoFiles := repofiles.Create(opts.RemotePath, opts.LocalPath, opts.WorkspaceClient)
return &Sync{
SyncOptions: &opts,
fileSet: fileSet,
snapshot: snapshot,
repoFiles: repoFiles,
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
notifier: &NopNotifier{},
seq: 0,
}, nil
}
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
func (s *Sync) Events() <-chan Event {
ch := make(chan Event, MaxRequestsInFlight)
s.notifier = &ChannelNotifier{ch}
return ch
}
func (s *Sync) Close() {
if s.notifier == nil {
return
}
s.notifier.Close()
s.notifier = nil
}
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
func (s *Sync) notifyStart(ctx context.Context, d diff) {
// If this is not the initial iteration we can ignore no-ops.
if s.seq > 0 && d.IsEmpty() {
return
}
s.notifier.Notify(ctx, newEventStart(s.seq, d.put, d.delete))
}
func (s *Sync) notifyProgress(ctx context.Context, action EventAction, path string, progress float32) {
s.notifier.Notify(ctx, newEventProgress(s.seq, action, path, progress))
}
func (s *Sync) notifyComplete(ctx context.Context, d diff) {
// If this is not the initial iteration we can ignore no-ops.
if s.seq > 0 && d.IsEmpty() {
return
}
s.notifier.Notify(ctx, newEventComplete(s.seq, d.put, d.delete))
s.seq++
}
func (s *Sync) RunOnce(ctx context.Context) error {
// tradeoff: doing portable monitoring only due to macOS max descriptor manual ulimit setting requirement
// https://github.com/gorakhargosh/watchdog/blob/master/src/watchdog/observers/kqueue.py#L394-L418
all, err := s.fileSet.All()
if err != nil {
log.Errorf(ctx, "cannot list files: %s", err)
return err
}
change, err := s.snapshot.diff(all)
if err != nil {
return err
}
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
s.notifyStart(ctx, change)
if change.IsEmpty() {
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
s.notifyComplete(ctx, change)
return nil
}
err = s.applyDiff(ctx, change)
if err != nil {
return err
}
err = s.snapshot.Save(ctx)
if err != nil {
log.Errorf(ctx, "cannot store snapshot: %s", err)
return err
}
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
s.notifyComplete(ctx, change)
return nil
}
func (s *Sync) DestroySnapshot(ctx context.Context) error {
return s.snapshot.Destroy(ctx)
}
func (s *Sync) SnapshotPath() string {
return s.snapshot.SnapshotPath
}
func (s *Sync) RunContinuous(ctx context.Context) error {
ticker := time.NewTicker(s.PollInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
err := s.RunOnce(ctx)
if err != nil {
return err
}
}
}
}