databricks-cli/cmd/sync/sync.go

159 lines
3.9 KiB
Go

package sync
import (
"context"
"fmt"
"log"
"strings"
"time"
"github.com/databricks/bricks/cmd/root"
"github.com/databricks/bricks/git"
"github.com/databricks/bricks/libs/sync"
"github.com/databricks/bricks/project"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/service/scim"
"github.com/databricks/databricks-sdk-go/service/workspace"
"github.com/spf13/cobra"
)
func matchesBasePaths(me *scim.User, path string) error {
basePaths := []string{
fmt.Sprintf("/Users/%s/", me.UserName),
fmt.Sprintf("/Repos/%s/", me.UserName),
}
basePathMatch := false
for _, basePath := range basePaths {
if strings.HasPrefix(path, basePath) {
basePathMatch = true
break
}
}
if !basePathMatch {
return fmt.Errorf("path must be nested under %s or %s", basePaths[0], basePaths[1])
}
return nil
}
// ensureRemotePathIsUsable checks if the specified path is nested under
// expected base paths and if it is a directory or repository.
func ensureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, me *scim.User, path string) error {
err := matchesBasePaths(me, path)
if err != nil {
return err
}
// Ensure that the remote path exits.
// If it is a repo, it has to exist.
// If it is a workspace path, it may not exist.
info, err := wsc.Workspace.GetStatusByPath(ctx, path)
if err != nil {
// We only deal with 404s below.
if !apierr.IsMissing(err) {
return err
}
switch {
case strings.HasPrefix(path, "/Repos/"):
return fmt.Errorf("%s does not exist; please create it first", path)
case strings.HasPrefix(path, "/Users/"):
// The workspace path doesn't exist. Create it and try again.
err = wsc.Workspace.MkdirsByPath(ctx, path)
if err != nil {
return fmt.Errorf("unable to create directory at %s: %w", path, err)
}
info, err = wsc.Workspace.GetStatusByPath(ctx, path)
if err != nil {
return err
}
default:
return err
}
}
log.Printf(
"[DEBUG] Path %s has type %s (ID: %d)",
info.Path,
strings.ToLower(info.ObjectType.String()),
info.ObjectId,
)
// We expect the object at path to be a directory or a repo.
switch info.ObjectType {
case workspace.ObjectTypeDirectory:
return nil
case workspace.ObjectTypeRepo:
return nil
}
return fmt.Errorf("%s points to a %s", path, strings.ToLower(info.ObjectType.String()))
}
// syncCmd represents the sync command
var syncCmd = &cobra.Command{
Use: "sync",
Short: "run syncs for the project",
PreRunE: project.Configure,
RunE: func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
prj := project.Get(ctx)
wsc := prj.WorkspacesClient()
me, err := prj.Me()
if err != nil {
return err
}
if *remotePath == "" {
repositoryName, err := git.RepositoryName()
if err != nil {
return err
}
*remotePath = fmt.Sprintf("/Repos/%s/%s", me.UserName, repositoryName)
}
log.Printf("[INFO] Remote file sync location: %v", *remotePath)
err = ensureRemotePathIsUsable(ctx, wsc, me, *remotePath)
if err != nil {
return err
}
cacheDir, err := prj.CacheDir()
if err != nil {
return err
}
opts := sync.SyncOptions{
LocalPath: prj.Root(),
RemotePath: *remotePath,
PersistSnapshot: *persistSnapshot,
SnapshotBasePath: cacheDir,
PollInterval: *interval,
WorkspaceClient: wsc,
}
s, err := sync.New(opts)
if err != nil {
return err
}
return s.RunWatchdog(ctx)
},
}
// project files polling interval
var interval *time.Duration
var remotePath *string
var persistSnapshot *bool
func init() {
root.RootCmd.AddCommand(syncCmd)
interval = syncCmd.Flags().Duration("interval", 1*time.Second, "project files polling interval")
remotePath = syncCmd.Flags().String("remote-path", "", "remote path to store repo in. eg: /Repos/me@example.com/test-repo")
persistSnapshot = syncCmd.Flags().Bool("persist-snapshot", true, "whether to store local snapshots of sync state")
}