From ae10419eb823da8086a4b1770458145a42a16c21 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 6 Jun 2023 01:16:23 +0200 Subject: [PATCH] Add fs cat command for dbfs files (#430) ## Changes TSIA ## Tests Manually and integration tests --- cmd/fs/cat.go | 41 ++++++++++++++++++++++++ internal/fs_cat_test.go | 70 +++++++++++++++++++++++++++++++++++++++++ libs/cmdio/io.go | 13 ++++++++ 3 files changed, 124 insertions(+) create mode 100644 cmd/fs/cat.go create mode 100644 internal/fs_cat_test.go diff --git a/cmd/fs/cat.go b/cmd/fs/cat.go new file mode 100644 index 00000000..01f28d38 --- /dev/null +++ b/cmd/fs/cat.go @@ -0,0 +1,41 @@ +package fs + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/filer" + "github.com/spf13/cobra" +) + +var catCmd = &cobra.Command{ + Use: "cat FILE_PATH", + Short: "Show file content", + Long: `Show the contents of a file.`, + Args: cobra.ExactArgs(1), + PreRunE: root.MustWorkspaceClient, + + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + path, err := resolveDbfsPath(args[0]) + if err != nil { + return err + } + + f, err := filer.NewDbfsClient(w, "/") + if err != nil { + return err + } + + r, err := f.Read(ctx, path) + if err != nil { + return err + } + return cmdio.RenderReader(ctx, r) + }, +} + +func init() { + fsCmd.AddCommand(catCmd) +} diff --git a/internal/fs_cat_test.go b/internal/fs_cat_test.go new file mode 100644 index 00000000..5d6952f4 --- /dev/null +++ b/internal/fs_cat_test.go @@ -0,0 +1,70 @@ +package internal + +import ( + "context" + "io/fs" + "path" + "strings" + "testing" + + "github.com/databricks/cli/libs/filer" + "github.com/databricks/databricks-sdk-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFsCatForDbfs(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + tmpDir := temporaryDbfsDir(t, w) + + f, err := filer.NewDbfsClient(w, tmpDir) + require.NoError(t, err) + + err = f.Write(ctx, "a/hello.txt", strings.NewReader("abc"), filer.CreateParentDirectories) + require.NoError(t, err) + + stdout, stderr := RequireSuccessfulRun(t, "fs", "cat", "dbfs:"+path.Join(tmpDir, "a", "hello.txt")) + assert.Equal(t, "", stderr.String()) + assert.Equal(t, "abc", stdout.String()) +} + +func TestFsCatForDbfsOnNonExistentFile(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + _, _, err := RequireErrorRun(t, "fs", "cat", "dbfs:/non-existent-file") + assert.ErrorIs(t, err, fs.ErrNotExist) +} + +func TestFsCatForDbfsInvalidScheme(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + _, _, err := RequireErrorRun(t, "fs", "cat", "dab:/non-existent-file") + assert.ErrorContains(t, err, "expected dbfs path (with the dbfs:/ prefix): dab:/non-existent-file") +} + +func TestFsCatDoesNotSupportOutputModeJson(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + tmpDir := temporaryDbfsDir(t, w) + + f, err := filer.NewDbfsClient(w, tmpDir) + require.NoError(t, err) + + err = f.Write(ctx, "hello.txt", strings.NewReader("abc")) + require.NoError(t, err) + + _, _, err = RequireErrorRun(t, "fs", "cat", "dbfs:"+path.Join(tmpDir, "hello.txt"), "--output=json") + assert.ErrorContains(t, err, "json output not supported") +} + +// TODO: Add test asserting an error when cat is called on an directory. Need this to be +// fixed in the SDK first (https://github.com/databricks/databricks-sdk-go/issues/414) diff --git a/libs/cmdio/io.go b/libs/cmdio/io.go index 1df6f5c1..32637b1d 100644 --- a/libs/cmdio/io.go +++ b/libs/cmdio/io.go @@ -87,6 +87,19 @@ func RenderWithTemplate(ctx context.Context, v any, template string) error { } } +func RenderReader(ctx context.Context, r io.Reader) error { + c := fromContext(ctx) + switch c.outputFormat { + case flags.OutputJSON: + return fmt.Errorf("json output not supported") + case flags.OutputText: + _, err := io.Copy(c.out, r) + return err + default: + return fmt.Errorf("invalid output format: %s", c.outputFormat) + } +} + type tuple struct{ Name, Id string } func (c *cmdIO) Select(names map[string]string, label string) (id string, err error) {