2023-01-23 12:52:39 +00:00
|
|
|
package sync
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-01-24 07:30:10 +00:00
|
|
|
"fmt"
|
2023-01-23 12:52:39 +00:00
|
|
|
"time"
|
|
|
|
|
2023-06-12 11:44:00 +00:00
|
|
|
"github.com/databricks/cli/libs/filer"
|
2023-05-16 16:35:39 +00:00
|
|
|
"github.com/databricks/cli/libs/git"
|
|
|
|
"github.com/databricks/cli/libs/log"
|
2023-01-23 12:52:39 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go"
|
|
|
|
)
|
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
type SyncOptions struct {
|
2023-01-23 12:52:39 +00:00
|
|
|
LocalPath string
|
|
|
|
RemotePath string
|
|
|
|
|
2023-01-24 14:06:59 +00:00
|
|
|
Full bool
|
2023-01-23 12:52:39 +00:00
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
SnapshotBasePath string
|
|
|
|
|
2023-01-23 12:52:39 +00:00
|
|
|
PollInterval time.Duration
|
2023-01-24 07:30:10 +00:00
|
|
|
|
|
|
|
WorkspaceClient *databricks.WorkspaceClient
|
|
|
|
|
|
|
|
Host string
|
|
|
|
}
|
|
|
|
|
|
|
|
type Sync struct {
|
|
|
|
*SyncOptions
|
|
|
|
|
2023-06-12 11:44:00 +00:00
|
|
|
fileSet *git.FileSet
|
|
|
|
snapshot *Snapshot
|
|
|
|
filer filer.Filer
|
Add optional JSON output for sync command (#230)
JSON output makes it easy to process synchronization progress
information in downstream tools (e.g. the vscode extension).
This changes introduces a `sync.Event` interface type for progress events as
well as an `sync.EventNotifier` that lets the sync code pass along
progress events to calling code.
Example output in text mode (default, this uses the existing logger calls):
```text
2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/...
2023/03/03 14:07:18 [INFO] Initial Sync Complete
2023/03/03 14:07:22 [INFO] Action: PUT: foo
2023/03/03 14:07:23 [INFO] Uploaded foo
2023/03/03 14:07:23 [INFO] Complete
2023/03/03 14:07:25 [INFO] Action: DELETE: foo
2023/03/03 14:07:25 [INFO] Deleted foo
2023/03/03 14:07:25 [INFO] Complete
```
Example output in JSON mode:
```json
{"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"}
{"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"}
{"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]}
{"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]}
```
---------
Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
|
|
|
|
|
|
|
// Synchronization progress events are sent to this event notifier.
|
|
|
|
notifier EventNotifier
|
|
|
|
seq int
|
2023-01-24 07:30:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// New initializes and returns a new [Sync] instance.
|
2023-01-24 12:58:10 +00:00
|
|
|
func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
|
2023-01-27 15:04:58 +00:00
|
|
|
fileSet, err := git.NewFileSet(opts.LocalPath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2023-01-31 17:34:36 +00:00
|
|
|
err = fileSet.EnsureValidGitIgnoreExists()
|
2023-01-24 07:30:10 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-01-24 12:58:10 +00:00
|
|
|
// Verify that the remote path we're about to synchronize to is valid and allowed.
|
2023-02-20 13:34:48 +00:00
|
|
|
err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath)
|
2023-01-24 12:58:10 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
// TODO: The host may be late-initialized in certain Azure setups where we
|
|
|
|
// specify the workspace by its resource ID. tracked in: https://databricks.atlassian.net/browse/DECO-194
|
|
|
|
opts.Host = opts.WorkspaceClient.Config.Host
|
|
|
|
if opts.Host == "" {
|
|
|
|
return nil, fmt.Errorf("failed to resolve host for snapshot")
|
|
|
|
}
|
|
|
|
|
2023-01-24 14:06:59 +00:00
|
|
|
// For full sync, we start with an empty snapshot.
|
|
|
|
// For incremental sync, we try to load an existing snapshot to start from.
|
|
|
|
var snapshot *Snapshot
|
|
|
|
if opts.Full {
|
2023-03-17 14:17:31 +00:00
|
|
|
snapshot, err = newSnapshot(ctx, &opts)
|
2023-01-24 14:06:59 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("unable to instantiate new sync snapshot: %w", err)
|
|
|
|
}
|
|
|
|
} else {
|
2023-03-17 14:17:31 +00:00
|
|
|
snapshot, err = loadOrNewSnapshot(ctx, &opts)
|
2023-01-24 14:06:59 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("unable to load sync snapshot: %w", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-12 11:44:00 +00:00
|
|
|
filer, err := filer.NewWorkspaceFilesClient(opts.WorkspaceClient, opts.RemotePath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2023-01-24 14:06:59 +00:00
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
return &Sync{
|
|
|
|
SyncOptions: &opts,
|
2023-01-24 14:06:59 +00:00
|
|
|
|
2023-06-12 11:44:00 +00:00
|
|
|
fileSet: fileSet,
|
|
|
|
snapshot: snapshot,
|
|
|
|
filer: filer,
|
|
|
|
notifier: &NopNotifier{},
|
|
|
|
seq: 0,
|
2023-01-24 07:30:10 +00:00
|
|
|
}, nil
|
2023-01-23 12:52:39 +00:00
|
|
|
}
|
|
|
|
|
Add optional JSON output for sync command (#230)
JSON output makes it easy to process synchronization progress
information in downstream tools (e.g. the vscode extension).
This changes introduces a `sync.Event` interface type for progress events as
well as an `sync.EventNotifier` that lets the sync code pass along
progress events to calling code.
Example output in text mode (default, this uses the existing logger calls):
```text
2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/...
2023/03/03 14:07:18 [INFO] Initial Sync Complete
2023/03/03 14:07:22 [INFO] Action: PUT: foo
2023/03/03 14:07:23 [INFO] Uploaded foo
2023/03/03 14:07:23 [INFO] Complete
2023/03/03 14:07:25 [INFO] Action: DELETE: foo
2023/03/03 14:07:25 [INFO] Deleted foo
2023/03/03 14:07:25 [INFO] Complete
```
Example output in JSON mode:
```json
{"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"}
{"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"}
{"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]}
{"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]}
```
---------
Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
|
|
|
func (s *Sync) Events() <-chan Event {
|
|
|
|
ch := make(chan Event, MaxRequestsInFlight)
|
|
|
|
s.notifier = &ChannelNotifier{ch}
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
2023-03-16 16:48:17 +00:00
|
|
|
func (s *Sync) Close() {
|
|
|
|
if s.notifier == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
s.notifier.Close()
|
|
|
|
s.notifier = nil
|
|
|
|
}
|
|
|
|
|
Add optional JSON output for sync command (#230)
JSON output makes it easy to process synchronization progress
information in downstream tools (e.g. the vscode extension).
This changes introduces a `sync.Event` interface type for progress events as
well as an `sync.EventNotifier` that lets the sync code pass along
progress events to calling code.
Example output in text mode (default, this uses the existing logger calls):
```text
2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/...
2023/03/03 14:07:18 [INFO] Initial Sync Complete
2023/03/03 14:07:22 [INFO] Action: PUT: foo
2023/03/03 14:07:23 [INFO] Uploaded foo
2023/03/03 14:07:23 [INFO] Complete
2023/03/03 14:07:25 [INFO] Action: DELETE: foo
2023/03/03 14:07:25 [INFO] Deleted foo
2023/03/03 14:07:25 [INFO] Complete
```
Example output in JSON mode:
```json
{"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"}
{"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"}
{"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]}
{"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]}
```
---------
Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
|
|
|
func (s *Sync) notifyStart(ctx context.Context, d diff) {
|
|
|
|
// If this is not the initial iteration we can ignore no-ops.
|
|
|
|
if s.seq > 0 && d.IsEmpty() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
s.notifier.Notify(ctx, newEventStart(s.seq, d.put, d.delete))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Sync) notifyProgress(ctx context.Context, action EventAction, path string, progress float32) {
|
|
|
|
s.notifier.Notify(ctx, newEventProgress(s.seq, action, path, progress))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Sync) notifyComplete(ctx context.Context, d diff) {
|
|
|
|
// If this is not the initial iteration we can ignore no-ops.
|
|
|
|
if s.seq > 0 && d.IsEmpty() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
s.notifier.Notify(ctx, newEventComplete(s.seq, d.put, d.delete))
|
|
|
|
s.seq++
|
|
|
|
}
|
|
|
|
|
2023-01-24 14:06:59 +00:00
|
|
|
func (s *Sync) RunOnce(ctx context.Context) error {
|
|
|
|
// tradeoff: doing portable monitoring only due to macOS max descriptor manual ulimit setting requirement
|
|
|
|
// https://github.com/gorakhargosh/watchdog/blob/master/src/watchdog/observers/kqueue.py#L394-L418
|
|
|
|
all, err := s.fileSet.All()
|
|
|
|
if err != nil {
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Errorf(ctx, "cannot list files: %s", err)
|
2023-01-24 14:06:59 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-05-23 10:10:15 +00:00
|
|
|
change, err := s.snapshot.diff(ctx, all)
|
2023-01-24 14:06:59 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
Add optional JSON output for sync command (#230)
JSON output makes it easy to process synchronization progress
information in downstream tools (e.g. the vscode extension).
This changes introduces a `sync.Event` interface type for progress events as
well as an `sync.EventNotifier` that lets the sync code pass along
progress events to calling code.
Example output in text mode (default, this uses the existing logger calls):
```text
2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/...
2023/03/03 14:07:18 [INFO] Initial Sync Complete
2023/03/03 14:07:22 [INFO] Action: PUT: foo
2023/03/03 14:07:23 [INFO] Uploaded foo
2023/03/03 14:07:23 [INFO] Complete
2023/03/03 14:07:25 [INFO] Action: DELETE: foo
2023/03/03 14:07:25 [INFO] Deleted foo
2023/03/03 14:07:25 [INFO] Complete
```
Example output in JSON mode:
```json
{"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"}
{"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"}
{"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]}
{"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]}
```
---------
Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
|
|
|
|
|
|
|
s.notifyStart(ctx, change)
|
2023-01-24 14:06:59 +00:00
|
|
|
if change.IsEmpty() {
|
Add optional JSON output for sync command (#230)
JSON output makes it easy to process synchronization progress
information in downstream tools (e.g. the vscode extension).
This changes introduces a `sync.Event` interface type for progress events as
well as an `sync.EventNotifier` that lets the sync code pass along
progress events to calling code.
Example output in text mode (default, this uses the existing logger calls):
```text
2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/...
2023/03/03 14:07:18 [INFO] Initial Sync Complete
2023/03/03 14:07:22 [INFO] Action: PUT: foo
2023/03/03 14:07:23 [INFO] Uploaded foo
2023/03/03 14:07:23 [INFO] Complete
2023/03/03 14:07:25 [INFO] Action: DELETE: foo
2023/03/03 14:07:25 [INFO] Deleted foo
2023/03/03 14:07:25 [INFO] Complete
```
Example output in JSON mode:
```json
{"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"}
{"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"}
{"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]}
{"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]}
```
---------
Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
|
|
|
s.notifyComplete(ctx, change)
|
2023-01-24 14:06:59 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-03-09 12:29:05 +00:00
|
|
|
err = s.applyDiff(ctx, change)
|
2023-01-24 14:06:59 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
err = s.snapshot.Save(ctx)
|
|
|
|
if err != nil {
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Errorf(ctx, "cannot store snapshot: %s", err)
|
2023-01-24 14:06:59 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
Add optional JSON output for sync command (#230)
JSON output makes it easy to process synchronization progress
information in downstream tools (e.g. the vscode extension).
This changes introduces a `sync.Event` interface type for progress events as
well as an `sync.EventNotifier` that lets the sync code pass along
progress events to calling code.
Example output in text mode (default, this uses the existing logger calls):
```text
2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/...
2023/03/03 14:07:18 [INFO] Initial Sync Complete
2023/03/03 14:07:22 [INFO] Action: PUT: foo
2023/03/03 14:07:23 [INFO] Uploaded foo
2023/03/03 14:07:23 [INFO] Complete
2023/03/03 14:07:25 [INFO] Action: DELETE: foo
2023/03/03 14:07:25 [INFO] Deleted foo
2023/03/03 14:07:25 [INFO] Complete
```
Example output in JSON mode:
```json
{"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"}
{"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"}
{"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]}
{"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]}
{"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0}
{"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1}
{"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]}
```
---------
Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
|
|
|
s.notifyComplete(ctx, change)
|
2023-01-24 14:06:59 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-04-11 14:57:01 +00:00
|
|
|
func (s *Sync) DestroySnapshot(ctx context.Context) error {
|
|
|
|
return s.snapshot.Destroy(ctx)
|
|
|
|
}
|
|
|
|
|
2023-04-18 14:55:06 +00:00
|
|
|
func (s *Sync) SnapshotPath() string {
|
|
|
|
return s.snapshot.SnapshotPath
|
|
|
|
}
|
|
|
|
|
2023-01-24 14:06:59 +00:00
|
|
|
func (s *Sync) RunContinuous(ctx context.Context) error {
|
|
|
|
ticker := time.NewTicker(s.PollInterval)
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
case <-ticker.C:
|
|
|
|
err := s.RunOnce(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-01-23 12:52:39 +00:00
|
|
|
}
|