2022-07-07 18:56:59 +00:00
|
|
|
package sync
|
|
|
|
|
|
|
|
import (
|
2022-10-19 14:22:55 +00:00
|
|
|
"context"
|
|
|
|
"crypto/md5"
|
|
|
|
"encoding/hex"
|
2022-09-14 15:50:29 +00:00
|
|
|
"encoding/json"
|
2024-06-03 12:39:36 +00:00
|
|
|
"errors"
|
2022-07-07 18:56:59 +00:00
|
|
|
"fmt"
|
2024-06-03 12:39:36 +00:00
|
|
|
"io/fs"
|
2022-09-14 15:50:29 +00:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2022-07-07 18:56:59 +00:00
|
|
|
"time"
|
|
|
|
|
2023-05-16 16:35:39 +00:00
|
|
|
"github.com/databricks/cli/libs/fileset"
|
|
|
|
"github.com/databricks/cli/libs/log"
|
2022-07-07 18:56:59 +00:00
|
|
|
)
|
|
|
|
|
2022-12-12 13:31:06 +00:00
|
|
|
// Bump it up every time a potentially breaking change is made to the snapshot schema
|
|
|
|
const LatestSnapshotVersion = "v1"
|
|
|
|
|
2023-05-16 16:35:39 +00:00
|
|
|
// A snapshot is a persistant store of knowledge this CLI has about state of files
|
2022-10-19 14:22:55 +00:00
|
|
|
// in the remote repo. We use the last modified times (mtime) of files to determine
|
|
|
|
// whether a files need to be updated in the remote repo.
|
|
|
|
//
|
|
|
|
// 1. Any stale files in the remote repo are updated. That is if the last modified
|
|
|
|
// time recorded in the snapshot is less than the actual last modified time of the file
|
|
|
|
//
|
|
|
|
// 2. Any files present in snapshot but absent locally are deleted from remote path
|
|
|
|
//
|
|
|
|
// Changing either the databricks workspace (ie Host) or the remote path (ie RemotePath)
|
2023-05-16 16:35:39 +00:00
|
|
|
// local files are being synced to will make this CLI switch to a different
|
2022-10-19 14:22:55 +00:00
|
|
|
// snapshot for persisting/loading sync state
|
|
|
|
type Snapshot struct {
|
2023-01-24 07:30:10 +00:00
|
|
|
// Path where this snapshot was loaded from and will be saved to.
|
|
|
|
// Intentionally not part of the snapshot state because it may be moved by the user.
|
2024-06-18 14:14:27 +00:00
|
|
|
snapshotPath string
|
2023-01-24 07:30:10 +00:00
|
|
|
|
|
|
|
// New indicates if this is a fresh snapshot or if it was loaded from disk.
|
|
|
|
New bool `json:"-"`
|
|
|
|
|
2022-12-12 13:31:06 +00:00
|
|
|
// version for snapshot schema. Only snapshots matching the latest snapshot
|
|
|
|
// schema version are used and older ones are invalidated (by deleting them)
|
|
|
|
Version string `json:"version"`
|
|
|
|
|
2022-10-19 14:22:55 +00:00
|
|
|
// hostname of the workspace this snapshot is for
|
|
|
|
Host string `json:"host"`
|
2022-12-12 13:31:06 +00:00
|
|
|
|
2022-10-19 14:22:55 +00:00
|
|
|
// Path in workspace for project repo
|
|
|
|
RemotePath string `json:"remote_path"`
|
2022-12-12 13:31:06 +00:00
|
|
|
|
2023-10-03 13:47:46 +00:00
|
|
|
*SnapshotState
|
2022-10-19 14:22:55 +00:00
|
|
|
}
|
2022-07-07 18:56:59 +00:00
|
|
|
|
2022-10-19 14:22:55 +00:00
|
|
|
const syncSnapshotDirName = "sync-snapshots"
|
2022-09-14 15:50:29 +00:00
|
|
|
|
2024-03-18 14:41:58 +00:00
|
|
|
func NewSnapshot(localFiles []fileset.File, opts *SyncOptions) (*Snapshot, error) {
|
|
|
|
snapshotPath, err := SnapshotPath(opts)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
snapshotState, err := NewSnapshotState(localFiles)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reset last modified times to make sure all files are synced
|
|
|
|
snapshotState.ResetLastModifiedTimes()
|
|
|
|
|
|
|
|
return &Snapshot{
|
2024-06-18 14:14:27 +00:00
|
|
|
snapshotPath: snapshotPath,
|
2024-03-18 14:41:58 +00:00
|
|
|
New: true,
|
|
|
|
Version: LatestSnapshotVersion,
|
|
|
|
Host: opts.Host,
|
|
|
|
RemotePath: opts.RemotePath,
|
|
|
|
SnapshotState: snapshotState,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2022-10-19 14:22:55 +00:00
|
|
|
func GetFileName(host, remotePath string) string {
|
|
|
|
hash := md5.Sum([]byte(host + remotePath))
|
|
|
|
hashString := hex.EncodeToString(hash[:])
|
|
|
|
return hashString[:16] + ".json"
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute path of the snapshot file on the local machine
|
|
|
|
// The file name for unique for a tuple of (host, remotePath)
|
|
|
|
// precisely it's the first 16 characters of md5(concat(host, remotePath))
|
2023-01-24 07:30:10 +00:00
|
|
|
func SnapshotPath(opts *SyncOptions) (string, error) {
|
|
|
|
snapshotDir := filepath.Join(opts.SnapshotBasePath, syncSnapshotDirName)
|
2024-06-03 12:39:36 +00:00
|
|
|
if _, err := os.Stat(snapshotDir); errors.Is(err, fs.ErrNotExist) {
|
2023-02-20 10:33:30 +00:00
|
|
|
err = os.MkdirAll(snapshotDir, 0o755)
|
2022-09-14 15:50:29 +00:00
|
|
|
if err != nil {
|
2022-10-19 14:22:55 +00:00
|
|
|
return "", fmt.Errorf("failed to create config directory: %s", err)
|
2022-09-14 15:50:29 +00:00
|
|
|
}
|
|
|
|
}
|
2023-01-24 07:30:10 +00:00
|
|
|
fileName := GetFileName(opts.Host, opts.RemotePath)
|
2022-10-19 14:22:55 +00:00
|
|
|
return filepath.Join(snapshotDir, fileName), nil
|
|
|
|
}
|
|
|
|
|
2023-03-17 14:17:31 +00:00
|
|
|
func newSnapshot(ctx context.Context, opts *SyncOptions) (*Snapshot, error) {
|
2023-01-24 07:30:10 +00:00
|
|
|
path, err := SnapshotPath(opts)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2022-10-19 14:22:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return &Snapshot{
|
2024-06-18 14:14:27 +00:00
|
|
|
snapshotPath: path,
|
2023-01-24 07:30:10 +00:00
|
|
|
New: true,
|
|
|
|
|
2023-10-03 13:47:46 +00:00
|
|
|
Version: LatestSnapshotVersion,
|
|
|
|
Host: opts.Host,
|
|
|
|
RemotePath: opts.RemotePath,
|
|
|
|
SnapshotState: &SnapshotState{
|
|
|
|
LastModifiedTimes: make(map[string]time.Time),
|
|
|
|
LocalToRemoteNames: make(map[string]string),
|
|
|
|
RemoteToLocalNames: make(map[string]string),
|
|
|
|
},
|
2022-10-19 14:22:55 +00:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
func (s *Snapshot) Save(ctx context.Context) error {
|
2024-06-18 14:14:27 +00:00
|
|
|
f, err := os.OpenFile(s.snapshotPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
|
2022-09-14 15:50:29 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to create/open persisted sync snapshot file: %s", err)
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
// persist snapshot to disk
|
|
|
|
bytes, err := json.MarshalIndent(s, "", " ")
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to json marshal in-memory snapshot: %s", err)
|
|
|
|
}
|
|
|
|
_, err = f.Write(bytes)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to write sync snapshot to disk: %s", err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-03-17 14:17:31 +00:00
|
|
|
func loadOrNewSnapshot(ctx context.Context, opts *SyncOptions) (*Snapshot, error) {
|
|
|
|
snapshot, err := newSnapshot(ctx, opts)
|
2022-10-19 14:22:55 +00:00
|
|
|
if err != nil {
|
2023-01-24 07:30:10 +00:00
|
|
|
return nil, err
|
2022-09-14 15:50:29 +00:00
|
|
|
}
|
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
// Snapshot file not found. We return the new copy.
|
2024-06-18 14:14:27 +00:00
|
|
|
if _, err := os.Stat(snapshot.snapshotPath); errors.Is(err, fs.ErrNotExist) {
|
2023-01-24 07:30:10 +00:00
|
|
|
return snapshot, nil
|
|
|
|
}
|
2022-09-14 15:50:29 +00:00
|
|
|
|
2024-06-18 14:14:27 +00:00
|
|
|
bytes, err := os.ReadFile(snapshot.snapshotPath)
|
2022-09-14 15:50:29 +00:00
|
|
|
if err != nil {
|
2023-01-24 07:30:10 +00:00
|
|
|
return nil, fmt.Errorf("failed to read sync snapshot from disk: %s", err)
|
2022-09-14 15:50:29 +00:00
|
|
|
}
|
2023-01-24 07:30:10 +00:00
|
|
|
|
|
|
|
var fromDisk Snapshot
|
|
|
|
err = json.Unmarshal(bytes, &fromDisk)
|
2022-09-14 15:50:29 +00:00
|
|
|
if err != nil {
|
2023-01-24 07:30:10 +00:00
|
|
|
return nil, fmt.Errorf("failed to json unmarshal persisted snapshot: %s", err)
|
2022-09-14 15:50:29 +00:00
|
|
|
}
|
2023-01-24 07:30:10 +00:00
|
|
|
|
2022-12-12 13:31:06 +00:00
|
|
|
// invalidate old snapshot with schema versions
|
2023-01-24 07:30:10 +00:00
|
|
|
if fromDisk.Version != LatestSnapshotVersion {
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Warnf(ctx, "Did not load existing snapshot because its version is %s while the latest version is %s", snapshot.Version, LatestSnapshotVersion)
|
|
|
|
return newSnapshot(ctx, opts)
|
2023-01-24 07:30:10 +00:00
|
|
|
}
|
2022-12-12 13:31:06 +00:00
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
// unmarshal again over the existing snapshot instance
|
|
|
|
err = json.Unmarshal(bytes, &snapshot)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to json unmarshal persisted snapshot: %s", err)
|
2022-12-12 13:31:06 +00:00
|
|
|
}
|
2023-01-24 07:30:10 +00:00
|
|
|
|
2024-05-30 07:41:50 +00:00
|
|
|
// Ensure that all paths are slash-separated upon loading
|
|
|
|
// an existing snapshot file. If it was created by an older
|
|
|
|
// CLI version (<= v0.220.0), it may contain backslashes.
|
|
|
|
snapshot.SnapshotState = snapshot.SnapshotState.ToSlash()
|
|
|
|
|
2023-01-24 07:30:10 +00:00
|
|
|
snapshot.New = false
|
|
|
|
return snapshot, nil
|
2022-09-14 15:50:29 +00:00
|
|
|
}
|
|
|
|
|
2023-10-03 13:47:46 +00:00
|
|
|
func (s *Snapshot) diff(ctx context.Context, all []fileset.File) (diff, error) {
|
|
|
|
targetState, err := NewSnapshotState(all)
|
|
|
|
if err != nil {
|
|
|
|
return diff{}, fmt.Errorf("error while computing new sync state: %w", err)
|
2022-07-07 18:56:59 +00:00
|
|
|
}
|
2022-12-12 13:31:06 +00:00
|
|
|
|
2023-10-03 13:47:46 +00:00
|
|
|
currentState := s.SnapshotState
|
|
|
|
if err := currentState.validate(); err != nil {
|
2024-06-18 14:14:27 +00:00
|
|
|
return diff{}, fmt.Errorf("error parsing existing sync state. Please delete your existing sync snapshot file (%s) and retry: %w", s.snapshotPath, err)
|
2022-07-07 18:56:59 +00:00
|
|
|
}
|
2023-06-12 11:44:00 +00:00
|
|
|
|
2023-10-03 13:47:46 +00:00
|
|
|
// Compute diff to apply to get from current state to new target state.
|
|
|
|
diff := computeDiff(targetState, currentState)
|
2023-06-12 11:44:00 +00:00
|
|
|
|
2023-10-03 13:47:46 +00:00
|
|
|
// Update state to new value. This is not persisted to the file system before
|
|
|
|
// the diff is applied successfully.
|
|
|
|
s.SnapshotState = targetState
|
|
|
|
return diff, nil
|
2022-07-07 18:56:59 +00:00
|
|
|
}
|