Add fs ls command for dbfs (#429)

## Changes
1. Adds fs ls command
2. Adds ability to define multiple templates

## Tests
Manually and integration tests
This commit is contained in:
shreyas-goenka 2023-06-05 17:41:30 +02:00 committed by GitHub
parent 1f130f3722
commit 6ff00122ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 249 additions and 19 deletions

View File

@ -10,7 +10,6 @@ var fsCmd = &cobra.Command{
Use: "fs",
Short: "Filesystem related commands",
Long: `Commands to do DBFS operations.`,
Hidden: true,
}
func init() {

14
cmd/fs/helpers.go Normal file
View File

@ -0,0 +1,14 @@
package fs
import (
"fmt"
"strings"
)
func resolveDbfsPath(path string) (string, error) {
if !strings.HasPrefix(path, "dbfs:/") {
return "", fmt.Errorf("expected dbfs path (with the dbfs:/ prefix): %s", path)
}
return strings.TrimPrefix(path, "dbfs:"), nil
}

38
cmd/fs/helpers_test.go Normal file
View File

@ -0,0 +1,38 @@
package fs
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestResolveDbfsPath(t *testing.T) {
path, err := resolveDbfsPath("dbfs:/")
assert.NoError(t, err)
assert.Equal(t, "/", path)
path, err = resolveDbfsPath("dbfs:/abc")
assert.NoError(t, err)
assert.Equal(t, "/abc", path)
path, err = resolveDbfsPath("dbfs:/a/b/c")
assert.NoError(t, err)
assert.Equal(t, "/a/b/c", path)
path, err = resolveDbfsPath("dbfs:/a/b/.")
assert.NoError(t, err)
assert.Equal(t, "/a/b/.", path)
path, err = resolveDbfsPath("dbfs:/a/../c")
assert.NoError(t, err)
assert.Equal(t, "/a/../c", path)
_, err = resolveDbfsPath("dbf:/a/b/c")
assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): dbf:/a/b/c")
_, err = resolveDbfsPath("/a/b/c")
assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): /a/b/c")
_, err = resolveDbfsPath("dbfs:a/b/c")
assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): dbfs:a/b/c")
}

View File

@ -1,23 +1,93 @@
package fs
import (
"fmt"
"io/fs"
"sort"
"time"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/filer"
"github.com/spf13/cobra"
)
type jsonDirEntry struct {
Name string `json:"name"`
IsDir bool `json:"is_directory"`
Size int64 `json:"size"`
ModTime time.Time `json:"last_modified"`
}
func toJsonDirEntry(f fs.DirEntry) (*jsonDirEntry, error) {
info, err := f.Info()
if err != nil {
return nil, err
}
return &jsonDirEntry{
Name: f.Name(),
IsDir: f.IsDir(),
Size: info.Size(),
ModTime: info.ModTime(),
}, nil
}
// lsCmd represents the ls command
var lsCmd = &cobra.Command{
Use: "ls <dir-name>",
Use: "ls DIR_PATH",
Short: "Lists files",
Long: `Lists files`,
Hidden: true,
Args: cobra.ExactArgs(1),
PreRunE: root.MustWorkspaceClient,
RunE: func(cmd *cobra.Command, args []string) error {
return fmt.Errorf("TODO")
ctx := cmd.Context()
w := root.WorkspaceClient(ctx)
path, err := resolveDbfsPath(args[0])
if err != nil {
return err
}
f, err := filer.NewDbfsClient(w, "/")
if err != nil {
return err
}
entries, err := f.ReadDir(ctx, path)
if err != nil {
return err
}
jsonDirEntries := make([]jsonDirEntry, len(entries))
for i, entry := range entries {
jsonDirEntry, err := toJsonDirEntry(entry)
if err != nil {
return err
}
jsonDirEntries[i] = *jsonDirEntry
}
sort.Slice(jsonDirEntries, func(i, j int) bool {
return jsonDirEntries[i].Name < jsonDirEntries[j].Name
})
// Use template for long mode if the flag is set
if longMode {
return cmdio.RenderWithTemplate(ctx, jsonDirEntries, cmdio.Heredoc(`
{{range .}}{{if .IsDir}}DIRECTORY {{else}}FILE {{end}}{{.Size}} {{.ModTime|pretty_date}} {{.Name}}
{{end}}
`))
}
return cmdio.RenderWithTemplate(ctx, jsonDirEntries, cmdio.Heredoc(`
{{range .}}{{.Name}}
{{end}}
`))
},
}
var longMode bool
func init() {
lsCmd.Flags().BoolVarP(&longMode, "long", "l", false, "Displays full information including size, file type and modification time since Epoch in milliseconds.")
fsCmd.AddCommand(lsCmd)
}

View File

@ -241,7 +241,7 @@ func TestAccFilerWorkspaceFilesReadDir(t *testing.T) {
func temporaryDbfsDir(t *testing.T, w *databricks.WorkspaceClient) string {
ctx := context.Background()
path := fmt.Sprintf("/tmp/%s", RandomName("integration-test-filer-dbfs-"))
path := fmt.Sprintf("/tmp/%s", RandomName("integration-test-dbfs-"))
// This call fails if the path already exists.
t.Logf("mkdir dbfs:%s", path)

104
internal/fs_ls_test.go Normal file
View File

@ -0,0 +1,104 @@
package internal
import (
"context"
"encoding/json"
"io/fs"
"path"
"regexp"
"strings"
"testing"
_ "github.com/databricks/cli/cmd/fs"
"github.com/databricks/cli/libs/filer"
"github.com/databricks/databricks-sdk-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFsLsForDbfs(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
ctx := context.Background()
w, err := databricks.NewWorkspaceClient()
require.NoError(t, err)
tmpDir := temporaryDbfsDir(t, w)
f, err := filer.NewDbfsClient(w, tmpDir)
require.NoError(t, err)
err = f.Mkdir(ctx, "a")
require.NoError(t, err)
err = f.Write(ctx, "a/hello.txt", strings.NewReader("abc"), filer.CreateParentDirectories)
require.NoError(t, err)
err = f.Write(ctx, "bye.txt", strings.NewReader("def"))
require.NoError(t, err)
stdout, stderr := RequireSuccessfulRun(t, "fs", "ls", "dbfs:"+tmpDir, "--output=json")
assert.Equal(t, "", stderr.String())
var parsedStdout []map[string]any
err = json.Unmarshal(stdout.Bytes(), &parsedStdout)
require.NoError(t, err)
// assert on ls output
assert.Equal(t, "a", parsedStdout[0]["name"])
assert.Equal(t, true, parsedStdout[0]["is_directory"])
assert.Equal(t, float64(0), parsedStdout[0]["size"])
assert.Equal(t, "bye.txt", parsedStdout[1]["name"])
assert.Equal(t, false, parsedStdout[1]["is_directory"])
assert.Equal(t, float64(3), parsedStdout[1]["size"])
}
func TestFsLsForDbfsOnFile(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
ctx := context.Background()
w, err := databricks.NewWorkspaceClient()
require.NoError(t, err)
tmpDir := temporaryDbfsDir(t, w)
f, err := filer.NewDbfsClient(w, tmpDir)
require.NoError(t, err)
err = f.Mkdir(ctx, "a")
require.NoError(t, err)
err = f.Write(ctx, "a/hello.txt", strings.NewReader("abc"), filer.CreateParentDirectories)
require.NoError(t, err)
_, _, err = RequireErrorRun(t, "fs", "ls", "dbfs:"+path.Join(tmpDir, "a", "hello.txt"), "--output=json")
assert.Regexp(t, regexp.MustCompile("not a directory: .*/a/hello.txt"), err.Error())
}
func TestFsLsForDbfsOnEmptyDir(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
w, err := databricks.NewWorkspaceClient()
require.NoError(t, err)
tmpDir := temporaryDbfsDir(t, w)
stdout, stderr := RequireSuccessfulRun(t, "fs", "ls", "dbfs:"+tmpDir, "--output=json")
assert.Equal(t, "", stderr.String())
var parsedStdout []map[string]any
err = json.Unmarshal(stdout.Bytes(), &parsedStdout)
require.NoError(t, err)
// assert on ls output
assert.Equal(t, 0, len(parsedStdout))
}
func TestFsLsForDbfsForNonexistingDir(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
_, _, err := RequireErrorRun(t, "fs", "ls", "dbfs:/john-cena", "--output=json")
assert.ErrorIs(t, err, fs.ErrNotExist)
}
func TestFsLsWithoutScheme(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
_, _, err := RequireErrorRun(t, "fs", "ls", "/ray-mysterio", "--output=json")
assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): /ray-mysterio")
}

View File

@ -66,14 +66,20 @@ func (c *cmdIO) IsTTY() bool {
return isatty.IsTerminal(fd) || isatty.IsCygwinTerminal(fd)
}
func (c *cmdIO) Render(v any) error {
func Render(ctx context.Context, v any) error {
c := fromContext(ctx)
return RenderWithTemplate(ctx, v, c.template)
}
func RenderWithTemplate(ctx context.Context, v any, template string) error {
// TODO: add terminal width & white/dark theme detection
c := fromContext(ctx)
switch c.outputFormat {
case flags.OutputJSON:
return renderJson(c.out, v)
case flags.OutputText:
if c.template != "" {
return renderTemplate(c.out, c.template, v)
if template != "" {
return renderTemplate(c.out, template, v)
}
return renderJson(c.out, v)
default:
@ -81,11 +87,6 @@ func (c *cmdIO) Render(v any) error {
}
}
func Render(ctx context.Context, v any) error {
c := fromContext(ctx)
return c.Render(v)
}
type tuple struct{ Name, Id string }
func (c *cmdIO) Select(names map[string]string, label string) (id string, err error) {

View File

@ -6,6 +6,7 @@ import (
"strings"
"text/tabwriter"
"text/template"
"time"
"github.com/fatih/color"
"github.com/nwidger/jsoncolor"
@ -85,6 +86,9 @@ func renderTemplate(w io.Writer, tmpl string, v any) error {
}
return string(b), nil
},
"pretty_date": func(t time.Time) string {
return t.Format("2006-01-02T15:04:05Z")
},
}).Parse(tmpl)
if err != nil {
return err