2023-06-16 15:09:08 +00:00
|
|
|
package fs
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io/fs"
|
|
|
|
"path"
|
|
|
|
"path/filepath"
|
|
|
|
|
|
|
|
"github.com/databricks/cli/cmd/root"
|
|
|
|
"github.com/databricks/cli/libs/cmdio"
|
|
|
|
"github.com/databricks/cli/libs/filer"
|
|
|
|
"github.com/spf13/cobra"
|
|
|
|
)
|
|
|
|
|
|
|
|
type copy struct {
|
2023-07-27 10:03:08 +00:00
|
|
|
overwrite bool
|
|
|
|
recursive bool
|
|
|
|
|
2023-06-27 12:42:27 +00:00
|
|
|
ctx context.Context
|
|
|
|
sourceFiler filer.Filer
|
|
|
|
targetFiler filer.Filer
|
|
|
|
sourceScheme string
|
|
|
|
targetScheme string
|
2023-06-16 15:09:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *copy) cpWriteCallback(sourceDir, targetDir string) fs.WalkDirFunc {
|
|
|
|
return func(sourcePath string, d fs.DirEntry, err error) error {
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute path relative to the target directory
|
|
|
|
relPath, err := filepath.Rel(sourceDir, sourcePath)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
relPath = filepath.ToSlash(relPath)
|
|
|
|
|
|
|
|
// Compute target path for the file
|
|
|
|
targetPath := path.Join(targetDir, relPath)
|
|
|
|
|
|
|
|
// create directory and return early
|
|
|
|
if d.IsDir() {
|
|
|
|
return c.targetFiler.Mkdir(c.ctx, targetPath)
|
|
|
|
}
|
|
|
|
|
|
|
|
return c.cpFileToFile(sourcePath, targetPath)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *copy) cpDirToDir(sourceDir, targetDir string) error {
|
2023-07-27 10:03:08 +00:00
|
|
|
if !c.recursive {
|
2023-06-16 15:09:08 +00:00
|
|
|
return fmt.Errorf("source path %s is a directory. Please specify the --recursive flag", sourceDir)
|
|
|
|
}
|
|
|
|
|
|
|
|
sourceFs := filer.NewFS(c.ctx, c.sourceFiler)
|
|
|
|
return fs.WalkDir(sourceFs, sourceDir, c.cpWriteCallback(sourceDir, targetDir))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *copy) cpFileToDir(sourcePath, targetDir string) error {
|
2024-01-11 18:49:42 +00:00
|
|
|
fileName := filepath.Base(sourcePath)
|
2023-06-16 15:09:08 +00:00
|
|
|
targetPath := path.Join(targetDir, fileName)
|
|
|
|
|
|
|
|
return c.cpFileToFile(sourcePath, targetPath)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *copy) cpFileToFile(sourcePath, targetPath string) error {
|
|
|
|
// Get reader for file at source path
|
|
|
|
r, err := c.sourceFiler.Read(c.ctx, sourcePath)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer r.Close()
|
|
|
|
|
2023-07-27 10:03:08 +00:00
|
|
|
if c.overwrite {
|
2023-06-16 15:09:08 +00:00
|
|
|
err = c.targetFiler.Write(c.ctx, targetPath, r, filer.OverwriteIfExists)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
err = c.targetFiler.Write(c.ctx, targetPath, r)
|
|
|
|
// skip if file already exists
|
|
|
|
if err != nil && errors.Is(err, fs.ErrExist) {
|
2023-06-27 12:42:27 +00:00
|
|
|
return c.emitFileSkippedEvent(sourcePath, targetPath)
|
2023-06-16 15:09:08 +00:00
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2023-06-27 12:42:27 +00:00
|
|
|
return c.emitFileCopiedEvent(sourcePath, targetPath)
|
2023-06-16 15:09:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: emit these events on stderr
|
|
|
|
// TODO: add integration tests for these events
|
2023-06-27 12:42:27 +00:00
|
|
|
func (c *copy) emitFileSkippedEvent(sourcePath, targetPath string) error {
|
|
|
|
fullSourcePath := sourcePath
|
|
|
|
if c.sourceScheme != "" {
|
|
|
|
fullSourcePath = path.Join(c.sourceScheme+":", sourcePath)
|
|
|
|
}
|
|
|
|
fullTargetPath := targetPath
|
|
|
|
if c.targetScheme != "" {
|
|
|
|
fullTargetPath = path.Join(c.targetScheme+":", targetPath)
|
|
|
|
}
|
|
|
|
|
|
|
|
event := newFileSkippedEvent(fullSourcePath, fullTargetPath)
|
2023-06-16 15:09:08 +00:00
|
|
|
template := "{{.SourcePath}} -> {{.TargetPath}} (skipped; already exists)\n"
|
|
|
|
|
Use Go SDK Iterators when listing resources with the CLI (#1202)
## Changes
Currently, when the CLI run a list API call (like list jobs), it uses
the `List*All` methods from the SDK, which list all resources in the
collection. This is very slow for large collections: if you need to list
all jobs from a workspace that has 10,000+ jobs, you'll be waiting for
at least 100 RPCs to complete before seeing any output.
Instead of using List*All() methods, the SDK recently added an iterator
data structure that allows traversing the collection without needing to
completely list it first. New pages are fetched lazily if the next
requested item belongs to the next page. Using the List() methods that
return these iterators, the CLI can proactively print out some of the
response before the complete collection has been fetched.
This involves a pretty major rewrite of the rendering logic in `cmdio`.
The idea there is to define custom rendering logic based on the type of
the provided resource. There are three renderer interfaces:
1. textRenderer: supports printing something in a textual format (i.e.
not JSON, and not templated).
2. jsonRenderer: supports printing something in a pretty-printed JSON
format.
3. templateRenderer: supports printing something using a text template.
There are also three renderer implementations:
1. readerRenderer: supports printing a reader. This only implements the
textRenderer interface.
2. iteratorRenderer: supports printing a `listing.Iterator` from the Go
SDK. This implements jsonRenderer and templateRenderer, buffering 20
resources at a time before writing them to the output.
3. defaultRenderer: supports printing arbitrary resources (the previous
implementation).
Callers will either use `cmdio.Render()` for rendering individual
resources or `io.Reader` or `cmdio.RenderIterator()` for rendering an
iterator. This separate method is needed to safely be able to match on
the type of the iterator, since Go does not allow runtime type matches
on generic types with an existential type parameter.
One other change that needs to happen is to split the templates used for
text representation of list resources into a header template and a row
template. The template is now executed multiple times for List API
calls, but the header should only be printed once. To support this, I
have added `headerTemplate` to `cmdIO`, and I have also changed
`RenderWithTemplate` to include a `headerTemplate` parameter everywhere.
## Tests
- [x] Unit tests for text rendering logic
- [x] Unit test for reflection-based iterator construction.
---------
Co-authored-by: Andrew Nester <andrew.nester@databricks.com>
2024-02-21 14:16:36 +00:00
|
|
|
return cmdio.RenderWithTemplate(c.ctx, event, "", template)
|
2023-06-16 15:09:08 +00:00
|
|
|
}
|
|
|
|
|
2023-06-27 12:42:27 +00:00
|
|
|
func (c *copy) emitFileCopiedEvent(sourcePath, targetPath string) error {
|
|
|
|
fullSourcePath := sourcePath
|
|
|
|
if c.sourceScheme != "" {
|
|
|
|
fullSourcePath = path.Join(c.sourceScheme+":", sourcePath)
|
|
|
|
}
|
|
|
|
fullTargetPath := targetPath
|
|
|
|
if c.targetScheme != "" {
|
|
|
|
fullTargetPath = path.Join(c.targetScheme+":", targetPath)
|
|
|
|
}
|
|
|
|
|
|
|
|
event := newFileCopiedEvent(fullSourcePath, fullTargetPath)
|
2023-06-16 15:09:08 +00:00
|
|
|
template := "{{.SourcePath}} -> {{.TargetPath}}\n"
|
|
|
|
|
Use Go SDK Iterators when listing resources with the CLI (#1202)
## Changes
Currently, when the CLI run a list API call (like list jobs), it uses
the `List*All` methods from the SDK, which list all resources in the
collection. This is very slow for large collections: if you need to list
all jobs from a workspace that has 10,000+ jobs, you'll be waiting for
at least 100 RPCs to complete before seeing any output.
Instead of using List*All() methods, the SDK recently added an iterator
data structure that allows traversing the collection without needing to
completely list it first. New pages are fetched lazily if the next
requested item belongs to the next page. Using the List() methods that
return these iterators, the CLI can proactively print out some of the
response before the complete collection has been fetched.
This involves a pretty major rewrite of the rendering logic in `cmdio`.
The idea there is to define custom rendering logic based on the type of
the provided resource. There are three renderer interfaces:
1. textRenderer: supports printing something in a textual format (i.e.
not JSON, and not templated).
2. jsonRenderer: supports printing something in a pretty-printed JSON
format.
3. templateRenderer: supports printing something using a text template.
There are also three renderer implementations:
1. readerRenderer: supports printing a reader. This only implements the
textRenderer interface.
2. iteratorRenderer: supports printing a `listing.Iterator` from the Go
SDK. This implements jsonRenderer and templateRenderer, buffering 20
resources at a time before writing them to the output.
3. defaultRenderer: supports printing arbitrary resources (the previous
implementation).
Callers will either use `cmdio.Render()` for rendering individual
resources or `io.Reader` or `cmdio.RenderIterator()` for rendering an
iterator. This separate method is needed to safely be able to match on
the type of the iterator, since Go does not allow runtime type matches
on generic types with an existential type parameter.
One other change that needs to happen is to split the templates used for
text representation of list resources into a header template and a row
template. The template is now executed multiple times for List API
calls, but the header should only be printed once. To support this, I
have added `headerTemplate` to `cmdIO`, and I have also changed
`RenderWithTemplate` to include a `headerTemplate` parameter everywhere.
## Tests
- [x] Unit tests for text rendering logic
- [x] Unit test for reflection-based iterator construction.
---------
Co-authored-by: Andrew Nester <andrew.nester@databricks.com>
2024-02-21 14:16:36 +00:00
|
|
|
return cmdio.RenderWithTemplate(c.ctx, event, "", template)
|
2023-06-16 15:09:08 +00:00
|
|
|
}
|
|
|
|
|
2023-07-27 10:03:08 +00:00
|
|
|
func newCpCommand() *cobra.Command {
|
|
|
|
cmd := &cobra.Command{
|
|
|
|
Use: "cp SOURCE_PATH TARGET_PATH",
|
2024-02-20 16:14:37 +00:00
|
|
|
Short: "Copy files and directories.",
|
|
|
|
Long: `Copy files and directories to and from any paths on DBFS, UC Volumes or your local filesystem.
|
2023-06-16 15:09:08 +00:00
|
|
|
|
2024-02-20 16:14:37 +00:00
|
|
|
For paths in DBFS and UC Volumes, it is required that you specify the "dbfs" scheme.
|
2023-07-27 10:03:08 +00:00
|
|
|
For example: dbfs:/foo/bar.
|
2023-06-16 15:09:08 +00:00
|
|
|
|
2023-07-27 10:03:08 +00:00
|
|
|
Recursively copying a directory will copy all files inside directory
|
|
|
|
at SOURCE_PATH to the directory at TARGET_PATH.
|
2023-06-16 15:09:08 +00:00
|
|
|
|
2023-07-27 10:03:08 +00:00
|
|
|
When copying a file, if TARGET_PATH is a directory, the file will be created
|
|
|
|
inside the directory, otherwise the file is created at TARGET_PATH.
|
|
|
|
`,
|
|
|
|
Args: cobra.ExactArgs(2),
|
|
|
|
PreRunE: root.MustWorkspaceClient,
|
|
|
|
}
|
2023-06-16 15:09:08 +00:00
|
|
|
|
2023-07-27 10:03:08 +00:00
|
|
|
var c copy
|
|
|
|
cmd.Flags().BoolVar(&c.overwrite, "overwrite", false, "overwrite existing files")
|
|
|
|
cmd.Flags().BoolVarP(&c.recursive, "recursive", "r", false, "recursively copy files from directory")
|
2023-06-16 15:09:08 +00:00
|
|
|
|
2023-07-27 10:03:08 +00:00
|
|
|
cmd.RunE = func(cmd *cobra.Command, args []string) error {
|
2023-06-16 15:09:08 +00:00
|
|
|
ctx := cmd.Context()
|
|
|
|
|
|
|
|
// Get source filer and source path without scheme
|
|
|
|
fullSourcePath := args[0]
|
|
|
|
sourceFiler, sourcePath, err := filerForPath(ctx, fullSourcePath)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get target filer and target path without scheme
|
|
|
|
fullTargetPath := args[1]
|
|
|
|
targetFiler, targetPath, err := filerForPath(ctx, fullTargetPath)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-07-27 10:03:08 +00:00
|
|
|
c.sourceScheme = ""
|
2023-06-27 12:42:27 +00:00
|
|
|
if isDbfsPath(fullSourcePath) {
|
2023-07-27 10:03:08 +00:00
|
|
|
c.sourceScheme = "dbfs"
|
2023-06-27 12:42:27 +00:00
|
|
|
}
|
2023-07-27 10:03:08 +00:00
|
|
|
c.targetScheme = ""
|
2023-06-27 12:42:27 +00:00
|
|
|
if isDbfsPath(fullTargetPath) {
|
2023-07-27 10:03:08 +00:00
|
|
|
c.targetScheme = "dbfs"
|
2023-06-27 12:42:27 +00:00
|
|
|
}
|
|
|
|
|
2023-07-27 10:03:08 +00:00
|
|
|
c.ctx = ctx
|
|
|
|
c.sourceFiler = sourceFiler
|
|
|
|
c.targetFiler = targetFiler
|
2023-06-16 15:09:08 +00:00
|
|
|
|
|
|
|
// Get information about file at source path
|
|
|
|
sourceInfo, err := sourceFiler.Stat(ctx, sourcePath)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// case 1: source path is a directory, then recursively create files at target path
|
|
|
|
if sourceInfo.IsDir() {
|
|
|
|
return c.cpDirToDir(sourcePath, targetPath)
|
|
|
|
}
|
|
|
|
|
|
|
|
// case 2: source path is a file, and target path is a directory. In this case
|
|
|
|
// we copy the file to inside the directory
|
|
|
|
if targetInfo, err := targetFiler.Stat(ctx, targetPath); err == nil && targetInfo.IsDir() {
|
|
|
|
return c.cpFileToDir(sourcePath, targetPath)
|
|
|
|
}
|
|
|
|
|
|
|
|
// case 3: source path is a file, and target path is a file
|
|
|
|
return c.cpFileToFile(sourcePath, targetPath)
|
2023-07-27 10:03:08 +00:00
|
|
|
}
|
2023-06-16 15:09:08 +00:00
|
|
|
|
2023-07-27 10:03:08 +00:00
|
|
|
return cmd
|
2023-06-16 15:09:08 +00:00
|
|
|
}
|