databricks-cli/cmd/sync/sync.go

170 lines
4.4 KiB
Go
Raw Normal View History

2022-07-07 18:56:59 +00:00
package sync
import (
"context"
"flag"
2022-07-07 18:56:59 +00:00
"fmt"
"io"
"path/filepath"
stdsync "sync"
2022-07-07 18:56:59 +00:00
"time"
"github.com/databricks/bricks/bundle"
2022-07-07 18:56:59 +00:00
"github.com/databricks/bricks/cmd/root"
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
"github.com/databricks/bricks/libs/flags"
"github.com/databricks/bricks/libs/sync"
"github.com/databricks/databricks-sdk-go"
2022-07-07 18:56:59 +00:00
"github.com/spf13/cobra"
)
func syncOptionsFromBundle(cmd *cobra.Command, args []string, b *bundle.Bundle) (*sync.SyncOptions, error) {
if len(args) > 0 {
return nil, fmt.Errorf("SRC and DST are not configurable in the context of a bundle")
}
cacheDir, err := b.CacheDir()
if err != nil {
return nil, fmt.Errorf("cannot get bundle cache directory: %w", err)
}
opts := sync.SyncOptions{
LocalPath: b.Config.Path,
RemotePath: b.Config.Workspace.FilePath.Workspace,
Full: full,
PollInterval: interval,
SnapshotBasePath: cacheDir,
WorkspaceClient: b.WorkspaceClient(),
}
return &opts, nil
}
func syncOptionsFromArgs(cmd *cobra.Command, args []string) (*sync.SyncOptions, error) {
if len(args) != 2 {
return nil, flag.ErrHelp
}
opts := sync.SyncOptions{
LocalPath: args[0],
RemotePath: args[1],
Full: full,
PollInterval: interval,
// We keep existing behavior for VS Code extension where if there is
// no bundle defined, we store the snapshots in `.databricks`.
// The sync code will automatically create this directory if it doesn't
// exist and add it to the `.gitignore` file in the root.
SnapshotBasePath: filepath.Join(args[0], ".databricks"),
WorkspaceClient: databricks.Must(databricks.NewWorkspaceClient()),
}
return &opts, nil
}
2022-07-07 18:56:59 +00:00
var syncCmd = &cobra.Command{
Use: "sync [flags] SRC DST",
Short: "Synchronize a local directory to a workspace directory",
Args: cobra.MaximumNArgs(2),
// PreRunE: root.TryConfigureBundle,
2022-07-07 18:56:59 +00:00
RunE: func(cmd *cobra.Command, args []string) error {
var opts *sync.SyncOptions
var err error
//
// To be uncommented and used once our VS Code extension is bundle aware.
// Until then, this could interfere with extension usage where a `bundle.yml` file is present.
// See https://github.com/databricks/bricks/pull/207.
//
// b := bundle.GetOrNil(cmd.Context())
// if b != nil {
// // Run initialize phase to make sure paths are set.
// err = bundle.Apply(cmd.Context(), b, []bundle.Mutator{
// phases.Initialize(),
// })
// if err != nil {
// return err
// }
// opts, err = syncOptionsFromBundle(cmd, args, b)
// } else {
opts, err = syncOptionsFromArgs(cmd, args)
// }
if err != nil {
return err
}
ctx := cmd.Context()
s, err := sync.New(ctx, *opts)
if err != nil {
return err
}
var outputFunc func(context.Context, <-chan sync.Event, io.Writer)
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
switch output {
case flags.OutputText:
outputFunc = textOutput
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
case flags.OutputJSON:
outputFunc = jsonOutput
}
var wg stdsync.WaitGroup
if outputFunc != nil {
wg.Add(1)
go func() {
defer wg.Done()
outputFunc(ctx, s.Events(), cmd.OutOrStdout())
}()
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
}
if watch {
err = s.RunContinuous(ctx)
} else {
err = s.RunOnce(ctx)
}
s.Close()
wg.Wait()
return err
2022-07-07 18:56:59 +00:00
},
ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
err := root.TryConfigureBundle(cmd, args)
if err != nil {
return nil, cobra.ShellCompDirectiveError
}
// No completion in the context of a bundle.
// Source and destination paths are taken from bundle configuration.
b := bundle.GetOrNil(cmd.Context())
if b != nil {
return nil, cobra.ShellCompDirectiveNoFileComp
}
switch len(args) {
case 0:
return nil, cobra.ShellCompDirectiveFilterDirs
case 1:
wsc, err := databricks.NewWorkspaceClient()
if err != nil {
return nil, cobra.ShellCompDirectiveError
}
return completeRemotePath(cmd.Context(), wsc, toComplete)
default:
return nil, cobra.ShellCompDirectiveNoFileComp
}
},
2022-07-07 18:56:59 +00:00
}
// project files polling interval
var interval time.Duration
var full bool
var watch bool
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
var output flags.Output = flags.OutputText
2022-07-07 18:56:59 +00:00
func init() {
root.RootCmd.AddCommand(syncCmd)
syncCmd.Flags().DurationVar(&interval, "interval", 1*time.Second, "file system polling interval (for --watch)")
syncCmd.Flags().BoolVar(&full, "full", false, "perform full synchronization (default is incremental)")
syncCmd.Flags().BoolVar(&watch, "watch", false, "watch local file system for changes")
Add optional JSON output for sync command (#230) JSON output makes it easy to process synchronization progress information in downstream tools (e.g. the vscode extension). This changes introduces a `sync.Event` interface type for progress events as well as an `sync.EventNotifier` that lets the sync code pass along progress events to calling code. Example output in text mode (default, this uses the existing logger calls): ```text 2023/03/03 14:07:17 [INFO] Remote file sync location: /Repos/pieter.noordhuis@databricks.com/... 2023/03/03 14:07:18 [INFO] Initial Sync Complete 2023/03/03 14:07:22 [INFO] Action: PUT: foo 2023/03/03 14:07:23 [INFO] Uploaded foo 2023/03/03 14:07:23 [INFO] Complete 2023/03/03 14:07:25 [INFO] Action: DELETE: foo 2023/03/03 14:07:25 [INFO] Deleted foo 2023/03/03 14:07:25 [INFO] Complete ``` Example output in JSON mode: ```json {"timestamp":"2023-03-03T14:08:15.459439+01:00","seq":0,"type":"start"} {"timestamp":"2023-03-03T14:08:15.459461+01:00","seq":0,"type":"complete"} {"timestamp":"2023-03-03T14:08:18.459821+01:00","seq":1,"type":"start","put":["foo"]} {"timestamp":"2023-03-03T14:08:18.459867+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:19.418696+01:00","seq":1,"type":"progress","action":"put","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:19.421397+01:00","seq":1,"type":"complete","put":["foo"]} {"timestamp":"2023-03-03T14:08:22.459238+01:00","seq":2,"type":"start","delete":["foo"]} {"timestamp":"2023-03-03T14:08:22.459268+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":0} {"timestamp":"2023-03-03T14:08:22.686413+01:00","seq":2,"type":"progress","action":"delete","path":"foo","progress":1} {"timestamp":"2023-03-03T14:08:22.688989+01:00","seq":2,"type":"complete","delete":["foo"]} ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2023-03-08 09:27:19 +00:00
syncCmd.Flags().Var(&output, "output", "type of output format")
2022-07-07 18:56:59 +00:00
}