From 6ff00122ade09fbfddf2a5ad73ab71488b2c24a0 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 5 Jun 2023 17:41:30 +0200 Subject: [PATCH] Add fs ls command for dbfs (#429) ## Changes 1. Adds fs ls command 2. Adds ability to define multiple templates ## Tests Manually and integration tests --- cmd/fs/fs.go | 7 ++- cmd/fs/helpers.go | 14 ++++++ cmd/fs/helpers_test.go | 38 +++++++++++++++ cmd/fs/ls.go | 82 +++++++++++++++++++++++++++++--- internal/filer_test.go | 2 +- internal/fs_ls_test.go | 104 +++++++++++++++++++++++++++++++++++++++++ libs/cmdio/io.go | 17 +++---- libs/cmdio/render.go | 4 ++ 8 files changed, 249 insertions(+), 19 deletions(-) create mode 100644 cmd/fs/helpers.go create mode 100644 cmd/fs/helpers_test.go create mode 100644 internal/fs_ls_test.go diff --git a/cmd/fs/fs.go b/cmd/fs/fs.go index 74d725d4..a69c4b62 100644 --- a/cmd/fs/fs.go +++ b/cmd/fs/fs.go @@ -7,10 +7,9 @@ import ( // fsCmd represents the fs command var fsCmd = &cobra.Command{ - Use: "fs", - Short: "Filesystem related commands", - Long: `Commands to do DBFS operations.`, - Hidden: true, + Use: "fs", + Short: "Filesystem related commands", + Long: `Commands to do DBFS operations.`, } func init() { diff --git a/cmd/fs/helpers.go b/cmd/fs/helpers.go new file mode 100644 index 00000000..e456bff9 --- /dev/null +++ b/cmd/fs/helpers.go @@ -0,0 +1,14 @@ +package fs + +import ( + "fmt" + "strings" +) + +func resolveDbfsPath(path string) (string, error) { + if !strings.HasPrefix(path, "dbfs:/") { + return "", fmt.Errorf("expected dbfs path (with the dbfs:/ prefix): %s", path) + } + + return strings.TrimPrefix(path, "dbfs:"), nil +} diff --git a/cmd/fs/helpers_test.go b/cmd/fs/helpers_test.go new file mode 100644 index 00000000..1d174ef9 --- /dev/null +++ b/cmd/fs/helpers_test.go @@ -0,0 +1,38 @@ +package fs + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestResolveDbfsPath(t *testing.T) { + path, err := resolveDbfsPath("dbfs:/") + assert.NoError(t, err) + assert.Equal(t, "/", path) + + path, err = resolveDbfsPath("dbfs:/abc") + assert.NoError(t, err) + assert.Equal(t, "/abc", path) + + path, err = resolveDbfsPath("dbfs:/a/b/c") + assert.NoError(t, err) + assert.Equal(t, "/a/b/c", path) + + path, err = resolveDbfsPath("dbfs:/a/b/.") + assert.NoError(t, err) + assert.Equal(t, "/a/b/.", path) + + path, err = resolveDbfsPath("dbfs:/a/../c") + assert.NoError(t, err) + assert.Equal(t, "/a/../c", path) + + _, err = resolveDbfsPath("dbf:/a/b/c") + assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): dbf:/a/b/c") + + _, err = resolveDbfsPath("/a/b/c") + assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): /a/b/c") + + _, err = resolveDbfsPath("dbfs:a/b/c") + assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): dbfs:a/b/c") +} diff --git a/cmd/fs/ls.go b/cmd/fs/ls.go index ac192385..200cbed5 100644 --- a/cmd/fs/ls.go +++ b/cmd/fs/ls.go @@ -1,23 +1,93 @@ package fs import ( - "fmt" + "io/fs" + "sort" + "time" + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/filer" "github.com/spf13/cobra" ) +type jsonDirEntry struct { + Name string `json:"name"` + IsDir bool `json:"is_directory"` + Size int64 `json:"size"` + ModTime time.Time `json:"last_modified"` +} + +func toJsonDirEntry(f fs.DirEntry) (*jsonDirEntry, error) { + info, err := f.Info() + if err != nil { + return nil, err + } + + return &jsonDirEntry{ + Name: f.Name(), + IsDir: f.IsDir(), + Size: info.Size(), + ModTime: info.ModTime(), + }, nil +} + // lsCmd represents the ls command var lsCmd = &cobra.Command{ - Use: "ls ", - Short: "Lists files", - Long: `Lists files`, - Hidden: true, + Use: "ls DIR_PATH", + Short: "Lists files", + Long: `Lists files`, + Args: cobra.ExactArgs(1), + PreRunE: root.MustWorkspaceClient, RunE: func(cmd *cobra.Command, args []string) error { - return fmt.Errorf("TODO") + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + path, err := resolveDbfsPath(args[0]) + if err != nil { + return err + } + + f, err := filer.NewDbfsClient(w, "/") + if err != nil { + return err + } + + entries, err := f.ReadDir(ctx, path) + if err != nil { + return err + } + + jsonDirEntries := make([]jsonDirEntry, len(entries)) + for i, entry := range entries { + jsonDirEntry, err := toJsonDirEntry(entry) + if err != nil { + return err + } + jsonDirEntries[i] = *jsonDirEntry + } + sort.Slice(jsonDirEntries, func(i, j int) bool { + return jsonDirEntries[i].Name < jsonDirEntries[j].Name + }) + + // Use template for long mode if the flag is set + if longMode { + return cmdio.RenderWithTemplate(ctx, jsonDirEntries, cmdio.Heredoc(` + {{range .}}{{if .IsDir}}DIRECTORY {{else}}FILE {{end}}{{.Size}} {{.ModTime|pretty_date}} {{.Name}} + {{end}} + `)) + } + return cmdio.RenderWithTemplate(ctx, jsonDirEntries, cmdio.Heredoc(` + {{range .}}{{.Name}} + {{end}} + `)) }, } +var longMode bool + func init() { + lsCmd.Flags().BoolVarP(&longMode, "long", "l", false, "Displays full information including size, file type and modification time since Epoch in milliseconds.") fsCmd.AddCommand(lsCmd) } diff --git a/internal/filer_test.go b/internal/filer_test.go index 5037f784..81c3e4ae 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -241,7 +241,7 @@ func TestAccFilerWorkspaceFilesReadDir(t *testing.T) { func temporaryDbfsDir(t *testing.T, w *databricks.WorkspaceClient) string { ctx := context.Background() - path := fmt.Sprintf("/tmp/%s", RandomName("integration-test-filer-dbfs-")) + path := fmt.Sprintf("/tmp/%s", RandomName("integration-test-dbfs-")) // This call fails if the path already exists. t.Logf("mkdir dbfs:%s", path) diff --git a/internal/fs_ls_test.go b/internal/fs_ls_test.go new file mode 100644 index 00000000..060c706d --- /dev/null +++ b/internal/fs_ls_test.go @@ -0,0 +1,104 @@ +package internal + +import ( + "context" + "encoding/json" + "io/fs" + "path" + "regexp" + "strings" + "testing" + + _ "github.com/databricks/cli/cmd/fs" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/databricks-sdk-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFsLsForDbfs(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + tmpDir := temporaryDbfsDir(t, w) + + f, err := filer.NewDbfsClient(w, tmpDir) + require.NoError(t, err) + + err = f.Mkdir(ctx, "a") + require.NoError(t, err) + err = f.Write(ctx, "a/hello.txt", strings.NewReader("abc"), filer.CreateParentDirectories) + require.NoError(t, err) + err = f.Write(ctx, "bye.txt", strings.NewReader("def")) + require.NoError(t, err) + + stdout, stderr := RequireSuccessfulRun(t, "fs", "ls", "dbfs:"+tmpDir, "--output=json") + assert.Equal(t, "", stderr.String()) + var parsedStdout []map[string]any + err = json.Unmarshal(stdout.Bytes(), &parsedStdout) + require.NoError(t, err) + + // assert on ls output + assert.Equal(t, "a", parsedStdout[0]["name"]) + assert.Equal(t, true, parsedStdout[0]["is_directory"]) + assert.Equal(t, float64(0), parsedStdout[0]["size"]) + assert.Equal(t, "bye.txt", parsedStdout[1]["name"]) + assert.Equal(t, false, parsedStdout[1]["is_directory"]) + assert.Equal(t, float64(3), parsedStdout[1]["size"]) +} + +func TestFsLsForDbfsOnFile(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + tmpDir := temporaryDbfsDir(t, w) + + f, err := filer.NewDbfsClient(w, tmpDir) + require.NoError(t, err) + + err = f.Mkdir(ctx, "a") + require.NoError(t, err) + err = f.Write(ctx, "a/hello.txt", strings.NewReader("abc"), filer.CreateParentDirectories) + require.NoError(t, err) + + _, _, err = RequireErrorRun(t, "fs", "ls", "dbfs:"+path.Join(tmpDir, "a", "hello.txt"), "--output=json") + assert.Regexp(t, regexp.MustCompile("not a directory: .*/a/hello.txt"), err.Error()) +} + +func TestFsLsForDbfsOnEmptyDir(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + tmpDir := temporaryDbfsDir(t, w) + + stdout, stderr := RequireSuccessfulRun(t, "fs", "ls", "dbfs:"+tmpDir, "--output=json") + assert.Equal(t, "", stderr.String()) + var parsedStdout []map[string]any + err = json.Unmarshal(stdout.Bytes(), &parsedStdout) + require.NoError(t, err) + + // assert on ls output + assert.Equal(t, 0, len(parsedStdout)) +} + +func TestFsLsForDbfsForNonexistingDir(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + _, _, err := RequireErrorRun(t, "fs", "ls", "dbfs:/john-cena", "--output=json") + assert.ErrorIs(t, err, fs.ErrNotExist) +} + +func TestFsLsWithoutScheme(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + _, _, err := RequireErrorRun(t, "fs", "ls", "/ray-mysterio", "--output=json") + assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): /ray-mysterio") +} diff --git a/libs/cmdio/io.go b/libs/cmdio/io.go index beaa8571..1df6f5c1 100644 --- a/libs/cmdio/io.go +++ b/libs/cmdio/io.go @@ -66,14 +66,20 @@ func (c *cmdIO) IsTTY() bool { return isatty.IsTerminal(fd) || isatty.IsCygwinTerminal(fd) } -func (c *cmdIO) Render(v any) error { +func Render(ctx context.Context, v any) error { + c := fromContext(ctx) + return RenderWithTemplate(ctx, v, c.template) +} + +func RenderWithTemplate(ctx context.Context, v any, template string) error { // TODO: add terminal width & white/dark theme detection + c := fromContext(ctx) switch c.outputFormat { case flags.OutputJSON: return renderJson(c.out, v) case flags.OutputText: - if c.template != "" { - return renderTemplate(c.out, c.template, v) + if template != "" { + return renderTemplate(c.out, template, v) } return renderJson(c.out, v) default: @@ -81,11 +87,6 @@ func (c *cmdIO) Render(v any) error { } } -func Render(ctx context.Context, v any) error { - c := fromContext(ctx) - return c.Render(v) -} - type tuple struct{ Name, Id string } func (c *cmdIO) Select(names map[string]string, label string) (id string, err error) { diff --git a/libs/cmdio/render.go b/libs/cmdio/render.go index 4ffd563f..c8f2b0df 100644 --- a/libs/cmdio/render.go +++ b/libs/cmdio/render.go @@ -6,6 +6,7 @@ import ( "strings" "text/tabwriter" "text/template" + "time" "github.com/fatih/color" "github.com/nwidger/jsoncolor" @@ -85,6 +86,9 @@ func renderTemplate(w io.Writer, tmpl string, v any) error { } return string(b), nil }, + "pretty_date": func(t time.Time) string { + return t.Format("2006-01-02T15:04:05Z") + }, }).Parse(tmpl) if err != nil { return err