diff --git a/CHANGELOG.md b/CHANGELOG.md index e7675f7c..2eae4ac1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Version changelog +## 0.200.1 + +CLI: +* Add --absolute flag for ls command ([#508](https://github.com/databricks/cli/pull/508)). +* Add dbfs scheme prefix to paths in cp command output ([#516](https://github.com/databricks/cli/pull/516)). +* Add provider detection to the repos create command ([#528](https://github.com/databricks/cli/pull/528)). +* Added configure-cluster flag for auth login ([#500](https://github.com/databricks/cli/pull/500)). +* Added prompts for Databricks profile for auth login command ([#502](https://github.com/databricks/cli/pull/502)). +* Allow specifying repo by path for repos commands ([#526](https://github.com/databricks/cli/pull/526)). +* Decode contents by default in workspace export command ([#531](https://github.com/databricks/cli/pull/531)). +* Fixed jobs create command to only accept JSON payload ([#498](https://github.com/databricks/cli/pull/498)). +* Make local files default for fs commands ([#506](https://github.com/databricks/cli/pull/506)). +* Remove \r from new line print statments ([#509](https://github.com/databricks/cli/pull/509)). +* Remove extra call to filer.Stat in dbfs filer.Read ([#515](https://github.com/databricks/cli/pull/515)). +* Update alerts command integration test ([#512](https://github.com/databricks/cli/pull/512)). +* Update variable regex to support hyphens ([#503](https://github.com/databricks/cli/pull/503)). + +Bundles: +* Add DATABRICKS_BUNDLE_TMP env variable ([#462](https://github.com/databricks/cli/pull/462)). +* Update Terraform provider schema structs ([#504](https://github.com/databricks/cli/pull/504)). + +Dependencies: +* Bump github.com/databricks/databricks-sdk-go from 0.9.1-0.20230614092458-b5bbc1c8dabb to 0.10.0 ([#497](https://github.com/databricks/cli/pull/497)). + +Internal: +* Use direct download for workspace filer read ([#514](https://github.com/databricks/cli/pull/514)). + ## 0.200.0 This version marks the first version available as public preview. diff --git a/bundle/bundle.go b/bundle/bundle.go index 1ee6dfb6..02d0eaac 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -1,4 +1,4 @@ -// Package bundle is the top level package for Databricks Application Bundles. +// Package bundle is the top level package for Databricks Asset Bundles. // // A bundle is represented by the [Bundle] type. It consists of configuration // and runtime state, such as a client to a Databricks workspace. diff --git a/cmd/bundle/root.go b/cmd/bundle/root.go index a96c0dc1..395ed383 100644 --- a/cmd/bundle/root.go +++ b/cmd/bundle/root.go @@ -8,7 +8,7 @@ import ( // rootCmd represents the root command for the bundle subcommand. var rootCmd = &cobra.Command{ Use: "bundle", - Short: "Databricks Application Bundles", + Short: "Databricks Asset Bundles", } func AddCommand(cmd *cobra.Command) { diff --git a/cmd/fs/cp.go b/cmd/fs/cp.go index 7d9ff24a..399a1aea 100644 --- a/cmd/fs/cp.go +++ b/cmd/fs/cp.go @@ -15,9 +15,11 @@ import ( ) type copy struct { - ctx context.Context - sourceFiler filer.Filer - targetFiler filer.Filer + ctx context.Context + sourceFiler filer.Filer + targetFiler filer.Filer + sourceScheme string + targetScheme string } func (c *copy) cpWriteCallback(sourceDir, targetDir string) fs.WalkDirFunc { @@ -78,29 +80,47 @@ func (c *copy) cpFileToFile(sourcePath, targetPath string) error { err = c.targetFiler.Write(c.ctx, targetPath, r) // skip if file already exists if err != nil && errors.Is(err, fs.ErrExist) { - return emitCpFileSkippedEvent(c.ctx, sourcePath, targetPath) + return c.emitFileSkippedEvent(sourcePath, targetPath) } if err != nil { return err } } - return emitCpFileCopiedEvent(c.ctx, sourcePath, targetPath) + return c.emitFileCopiedEvent(sourcePath, targetPath) } // TODO: emit these events on stderr // TODO: add integration tests for these events -func emitCpFileSkippedEvent(ctx context.Context, sourcePath, targetPath string) error { - event := newFileSkippedEvent(sourcePath, targetPath) +func (c *copy) emitFileSkippedEvent(sourcePath, targetPath string) error { + fullSourcePath := sourcePath + if c.sourceScheme != "" { + fullSourcePath = path.Join(c.sourceScheme+":", sourcePath) + } + fullTargetPath := targetPath + if c.targetScheme != "" { + fullTargetPath = path.Join(c.targetScheme+":", targetPath) + } + + event := newFileSkippedEvent(fullSourcePath, fullTargetPath) template := "{{.SourcePath}} -> {{.TargetPath}} (skipped; already exists)\n" - return cmdio.RenderWithTemplate(ctx, event, template) + return cmdio.RenderWithTemplate(c.ctx, event, template) } -func emitCpFileCopiedEvent(ctx context.Context, sourcePath, targetPath string) error { - event := newFileCopiedEvent(sourcePath, targetPath) +func (c *copy) emitFileCopiedEvent(sourcePath, targetPath string) error { + fullSourcePath := sourcePath + if c.sourceScheme != "" { + fullSourcePath = path.Join(c.sourceScheme+":", sourcePath) + } + fullTargetPath := targetPath + if c.targetScheme != "" { + fullTargetPath = path.Join(c.targetScheme+":", targetPath) + } + + event := newFileCopiedEvent(fullSourcePath, fullTargetPath) template := "{{.SourcePath}} -> {{.TargetPath}}\n" - return cmdio.RenderWithTemplate(ctx, event, template) + return cmdio.RenderWithTemplate(c.ctx, event, template) } var cpOverwrite bool @@ -144,10 +164,21 @@ var cpCmd = &cobra.Command{ return err } + sourceScheme := "" + if isDbfsPath(fullSourcePath) { + sourceScheme = "dbfs" + } + targetScheme := "" + if isDbfsPath(fullTargetPath) { + targetScheme = "dbfs" + } + c := copy{ - ctx: ctx, - sourceFiler: sourceFiler, - targetFiler: targetFiler, + ctx: ctx, + sourceFiler: sourceFiler, + targetFiler: targetFiler, + sourceScheme: sourceScheme, + targetScheme: targetScheme, } // Get information about file at source path diff --git a/cmd/fs/helpers.go b/cmd/fs/helpers.go index aecee9e0..43d65b5d 100644 --- a/cmd/fs/helpers.go +++ b/cmd/fs/helpers.go @@ -10,47 +10,42 @@ import ( "github.com/databricks/cli/libs/filer" ) -type Scheme string - -const ( - DbfsScheme = Scheme("dbfs") - LocalScheme = Scheme("file") - NoScheme = Scheme("") -) - func filerForPath(ctx context.Context, fullPath string) (filer.Filer, string, error) { - parts := strings.SplitN(fullPath, ":/", 2) + // Split path at : to detect any file schemes + parts := strings.SplitN(fullPath, ":", 2) + + // If no scheme is specified, then local path if len(parts) < 2 { - return nil, "", fmt.Errorf(`no scheme specified for path %s. Please specify scheme "dbfs" or "file". Example: file:/foo/bar or file:/c:/foo/bar`, fullPath) + f, err := filer.NewLocalClient("") + return f, fullPath, err } - scheme := Scheme(parts[0]) + + // On windows systems, paths start with a drive letter. If the scheme + // is a single letter and the OS is windows, then we conclude the path + // is meant to be a local path. + if runtime.GOOS == "windows" && len(parts[0]) == 1 { + f, err := filer.NewLocalClient("") + return f, fullPath, err + } + + if parts[0] != "dbfs" { + return nil, "", fmt.Errorf("invalid scheme: %s", parts[0]) + } + path := parts[1] - switch scheme { - case DbfsScheme: - w := root.WorkspaceClient(ctx) - // If the specified path has the "Volumes" prefix, use the Files API. - if strings.HasPrefix(path, "Volumes/") { - f, err := filer.NewFilesClient(w, "/") - return f, path, err - } - f, err := filer.NewDbfsClient(w, "/") - return f, path, err + w := root.WorkspaceClient(ctx) - case LocalScheme: - if runtime.GOOS == "windows" { - parts := strings.SplitN(path, ":", 2) - if len(parts) < 2 { - return nil, "", fmt.Errorf("no volume specfied for path: %s", path) - } - volume := parts[0] + ":" - relPath := parts[1] - f, err := filer.NewLocalClient(volume) - return f, relPath, err - } - f, err := filer.NewLocalClient("/") + // If the specified path has the "Volumes" prefix, use the Files API. + if strings.HasPrefix(path, "/Volumes/") { + f, err := filer.NewFilesClient(w, "/") return f, path, err - - default: - return nil, "", fmt.Errorf(`unsupported scheme %s specified for path %s. Please specify scheme "dbfs" or "file". Example: file:/foo/bar or file:/c:/foo/bar`, scheme, fullPath) } + + // The file is a dbfs file, and uses the DBFS APIs + f, err := filer.NewDbfsClient(w, "/") + return f, path, err +} + +func isDbfsPath(path string) bool { + return strings.HasPrefix(path, "dbfs:/") } diff --git a/cmd/fs/helpers_test.go b/cmd/fs/helpers_test.go index 4beda6ca..d86bd46e 100644 --- a/cmd/fs/helpers_test.go +++ b/cmd/fs/helpers_test.go @@ -5,22 +5,58 @@ import ( "runtime" "testing" + "github.com/databricks/cli/libs/filer" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) -func TestNotSpecifyingVolumeForWindowsPathErrors(t *testing.T) { +func TestFilerForPathForLocalPaths(t *testing.T) { + tmpDir := t.TempDir() + ctx := context.Background() + + f, path, err := filerForPath(ctx, tmpDir) + assert.NoError(t, err) + assert.Equal(t, tmpDir, path) + + info, err := f.Stat(ctx, path) + require.NoError(t, err) + assert.True(t, info.IsDir()) +} + +func TestFilerForPathForInvalidScheme(t *testing.T) { + ctx := context.Background() + + _, _, err := filerForPath(ctx, "dbf:/a") + assert.ErrorContains(t, err, "invalid scheme") + + _, _, err = filerForPath(ctx, "foo:a") + assert.ErrorContains(t, err, "invalid scheme") + + _, _, err = filerForPath(ctx, "file:/a") + assert.ErrorContains(t, err, "invalid scheme") +} + +func testWindowsFilerForPath(t *testing.T, ctx context.Context, fullPath string) { + f, path, err := filerForPath(ctx, fullPath) + assert.NoError(t, err) + + // Assert path remains unchanged + assert.Equal(t, path, fullPath) + + // Assert local client is created + _, ok := f.(*filer.LocalClient) + assert.True(t, ok) +} + +func TestFilerForWindowsLocalPaths(t *testing.T) { if runtime.GOOS != "windows" { - t.Skip() + t.SkipNow() } ctx := context.Background() - pathWithVolume := `file:/c:/foo/bar` - pathWOVolume := `file:/uno/dos` - - _, path, err := filerForPath(ctx, pathWithVolume) - assert.NoError(t, err) - assert.Equal(t, `/foo/bar`, path) - - _, _, err = filerForPath(ctx, pathWOVolume) - assert.Equal(t, "no volume specfied for path: uno/dos", err.Error()) + testWindowsFilerForPath(t, ctx, `c:\abc`) + testWindowsFilerForPath(t, ctx, `c:abc`) + testWindowsFilerForPath(t, ctx, `d:\abc`) + testWindowsFilerForPath(t, ctx, `d:\abc`) + testWindowsFilerForPath(t, ctx, `f:\abc\ef`) } diff --git a/cmd/fs/ls.go b/cmd/fs/ls.go index 1226e937..b06345d5 100644 --- a/cmd/fs/ls.go +++ b/cmd/fs/ls.go @@ -2,6 +2,7 @@ package fs import ( "io/fs" + "path" "sort" "time" @@ -17,14 +18,19 @@ type jsonDirEntry struct { ModTime time.Time `json:"last_modified"` } -func toJsonDirEntry(f fs.DirEntry) (*jsonDirEntry, error) { +func toJsonDirEntry(f fs.DirEntry, baseDir string, isAbsolute bool) (*jsonDirEntry, error) { info, err := f.Info() if err != nil { return nil, err } + name := f.Name() + if isAbsolute { + name = path.Join(baseDir, name) + } + return &jsonDirEntry{ - Name: f.Name(), + Name: name, IsDir: f.IsDir(), Size: info.Size(), ModTime: info.ModTime(), @@ -54,7 +60,7 @@ var lsCmd = &cobra.Command{ jsonDirEntries := make([]jsonDirEntry, len(entries)) for i, entry := range entries { - jsonDirEntry, err := toJsonDirEntry(entry) + jsonDirEntry, err := toJsonDirEntry(entry, args[0], lsAbsolute) if err != nil { return err } @@ -79,8 +85,10 @@ var lsCmd = &cobra.Command{ } var longMode bool +var lsAbsolute bool func init() { lsCmd.Flags().BoolVarP(&longMode, "long", "l", false, "Displays full information including size, file type and modification time since Epoch in milliseconds.") + lsCmd.Flags().BoolVar(&lsAbsolute, "absolute", false, "Displays absolute paths.") fsCmd.AddCommand(lsCmd) } diff --git a/cmd/workspace/repos/overrides.go b/cmd/workspace/repos/overrides.go index e3b58857..44d9dca4 100644 --- a/cmd/workspace/repos/overrides.go +++ b/cmd/workspace/repos/overrides.go @@ -1,9 +1,162 @@ package repos -import "github.com/databricks/cli/libs/cmdio" +import ( + "context" + "fmt" + "strconv" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/spf13/cobra" +) func init() { listCmd.Annotations["template"] = cmdio.Heredoc(` {{range .}}{{green "%d" .Id}} {{.Path}} {{.Branch|blue}} {{.Url|cyan}} {{end}}`) + + createCmd.Use = "create URL [PROVIDER]" + createCmd.Args = func(cmd *cobra.Command, args []string) error { + // If the provider argument is not specified, we try to detect it from the URL. + check := cobra.RangeArgs(1, 2) + if cmd.Flags().Changed("json") { + check = cobra.ExactArgs(0) + } + return check(cmd, args) + } + createCmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + if cmd.Flags().Changed("json") { + err = createJson.Unmarshal(&createReq) + if err != nil { + return err + } + } else { + createReq.Url = args[0] + if len(args) > 1 { + createReq.Provider = args[1] + } else { + createReq.Provider = DetectProvider(createReq.Url) + if createReq.Provider == "" { + return fmt.Errorf( + "could not detect provider from URL %q; please specify", createReq.Url) + } + } + } + response, err := w.Repos.Create(ctx, createReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + deleteCmd.Use = "delete REPO_ID_OR_PATH" + deleteCmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + if cmd.Flags().Changed("json") { + err = deleteJson.Unmarshal(&deleteReq) + if err != nil { + return err + } + } else { + deleteReq.RepoId, err = repoArgumentToRepoID(ctx, w, args) + if err != nil { + return err + } + } + err = w.Repos.Delete(ctx, deleteReq) + if err != nil { + return err + } + return nil + } + + getCmd.Use = "get REPO_ID_OR_PATH" + getCmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + if cmd.Flags().Changed("json") { + err = getJson.Unmarshal(&getReq) + if err != nil { + return err + } + } else { + getReq.RepoId, err = repoArgumentToRepoID(ctx, w, args) + if err != nil { + return err + } + } + + response, err := w.Repos.Get(ctx, getReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + updateCmd.Use = "update REPO_ID_OR_PATH" + updateCmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + if cmd.Flags().Changed("json") { + err = updateJson.Unmarshal(&updateReq) + if err != nil { + return err + } + } else { + updateReq.RepoId, err = repoArgumentToRepoID(ctx, w, args) + if err != nil { + return err + } + } + + err = w.Repos.Update(ctx, updateReq) + if err != nil { + return err + } + return nil + } +} + +func repoArgumentToRepoID(ctx context.Context, w *databricks.WorkspaceClient, args []string) (int64, error) { + // ---- Begin copy from cmd/workspace/repos/repos.go ---- + if len(args) == 0 { + promptSpinner := cmdio.Spinner(ctx) + promptSpinner <- "No REPO_ID argument specified. Loading names for Repos drop-down." + names, err := w.Repos.RepoInfoPathToIdMap(ctx, workspace.ListReposRequest{}) + close(promptSpinner) + if err != nil { + return 0, fmt.Errorf("failed to load names for Repos drop-down. Please manually specify required arguments. Original error: %w", err) + } + id, err := cmdio.Select(ctx, names, "The ID for the corresponding repo to access") + if err != nil { + return 0, err + } + args = append(args, id) + } + if len(args) != 1 { + return 0, fmt.Errorf("expected to have the id for the corresponding repo to access") + } + // ---- End copy from cmd/workspace/repos/repos.go ---- + + // If the argument is a repo ID, return it. + arg := args[0] + id, err := strconv.ParseInt(arg, 10, 64) + if err == nil { + return id, nil + } + + // If the argument cannot be parsed as a repo ID, try to look it up by name. + oi, err := w.Workspace.GetStatusByPath(ctx, arg) + if err != nil { + return 0, fmt.Errorf("failed to look up repo by path: %w", err) + } + if oi.ObjectType != workspace.ObjectTypeRepo { + return 0, fmt.Errorf("object at path %q is not a repo", arg) + } + return oi.ObjectId, nil } diff --git a/cmd/workspace/repos/provider.go b/cmd/workspace/repos/provider.go new file mode 100644 index 00000000..9e407e2b --- /dev/null +++ b/cmd/workspace/repos/provider.go @@ -0,0 +1,30 @@ +package repos + +import ( + "net/url" + "regexp" + "strings" +) + +var gitProviders = map[string]string{ + "github.com": "gitHub", + "dev.azure.com": "azureDevOpsServices", + "gitlab.com": "gitLab", + "bitbucket.org": "bitbucketCloud", +} + +var awsCodeCommitRegexp = regexp.MustCompile(`^git-codecommit\.[^.]+\.amazonaws.com$`) + +func DetectProvider(rawURL string) string { + provider := "" + u, err := url.Parse(rawURL) + if err != nil { + return provider + } + if v, ok := gitProviders[strings.ToLower(u.Host)]; ok { + provider = v + } else if awsCodeCommitRegexp.MatchString(u.Host) { + provider = "awsCodeCommit" + } + return provider +} diff --git a/cmd/workspace/repos/provider_test.go b/cmd/workspace/repos/provider_test.go new file mode 100644 index 00000000..e719be2b --- /dev/null +++ b/cmd/workspace/repos/provider_test.go @@ -0,0 +1,21 @@ +package repos + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDetectProvider(t *testing.T) { + for url, provider := range map[string]string{ + "https://user@bitbucket.org/user/repo.git": "bitbucketCloud", + "https://github.com//user/repo.git": "gitHub", + "https://user@dev.azure.com/user/project/_git/repo": "azureDevOpsServices", + "https://abc/user/repo.git": "", + "ewfgwergfwe": "", + "https://foo@@bar": "", + "https://git-codecommit.us-east-2.amazonaws.com/v1/repos/MyDemoRepo": "awsCodeCommit", + } { + assert.Equal(t, provider, DetectProvider(url)) + } +} diff --git a/cmd/workspace/workspace/overrides.go b/cmd/workspace/workspace/overrides.go index 0a00ba25..e1b97c59 100644 --- a/cmd/workspace/workspace/overrides.go +++ b/cmd/workspace/workspace/overrides.go @@ -8,4 +8,7 @@ func init() { {{header "ID"}} {{header "Type"}} {{header "Language"}} {{header "Path"}} {{range .}}{{green "%d" .ObjectId}} {{blue "%s" .ObjectType}} {{cyan "%s" .Language}} {{.Path|cyan}} {{end}}`) + + // The export command prints the contents of the file to stdout by default. + exportCmd.Annotations["template"] = `{{.Content | b64_decode}}` } diff --git a/internal/fs_cp_test.go b/internal/fs_cp_test.go index c9171086..766d6a59 100644 --- a/internal/fs_cp_test.go +++ b/internal/fs_cp_test.go @@ -66,7 +66,7 @@ func setupLocalFiler(t *testing.T) (filer.Filer, string) { f, err := filer.NewLocalClient(tmp) require.NoError(t, err) - return f, path.Join("file:/", filepath.ToSlash(tmp)) + return f, path.Join(filepath.ToSlash(tmp)) } func setupDbfsFiler(t *testing.T) (filer.Filer, string) { @@ -259,21 +259,14 @@ func TestAccFsCpErrorsWhenSourceIsDirWithoutRecursiveFlag(t *testing.T) { tmpDir := temporaryDbfsDir(t, w) _, _, err = RequireErrorRun(t, "fs", "cp", "dbfs:"+tmpDir, "dbfs:/tmp") - assert.Equal(t, fmt.Sprintf("source path %s is a directory. Please specify the --recursive flag", strings.TrimPrefix(tmpDir, "/")), err.Error()) -} - -func TestAccFsCpErrorsOnNoScheme(t *testing.T) { - t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) - - _, _, err := RequireErrorRun(t, "fs", "cp", "/a", "/b") - assert.Equal(t, "no scheme specified for path /a. Please specify scheme \"dbfs\" or \"file\". Example: file:/foo/bar or file:/c:/foo/bar", err.Error()) + assert.Equal(t, fmt.Sprintf("source path %s is a directory. Please specify the --recursive flag", tmpDir), err.Error()) } func TestAccFsCpErrorsOnInvalidScheme(t *testing.T) { t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) _, _, err := RequireErrorRun(t, "fs", "cp", "dbfs:/a", "https:/b") - assert.Equal(t, "unsupported scheme https specified for path https:/b. Please specify scheme \"dbfs\" or \"file\". Example: file:/foo/bar or file:/c:/foo/bar", err.Error()) + assert.Equal(t, "invalid scheme: https", err.Error()) } func TestAccFsCpSourceIsDirectoryButTargetIsFile(t *testing.T) { diff --git a/internal/fs_ls_test.go b/internal/fs_ls_test.go index 060c706d..885fc31f 100644 --- a/internal/fs_ls_test.go +++ b/internal/fs_ls_test.go @@ -42,6 +42,7 @@ func TestFsLsForDbfs(t *testing.T) { require.NoError(t, err) // assert on ls output + assert.Len(t, parsedStdout, 2) assert.Equal(t, "a", parsedStdout[0]["name"]) assert.Equal(t, true, parsedStdout[0]["is_directory"]) assert.Equal(t, float64(0), parsedStdout[0]["size"]) @@ -50,6 +51,42 @@ func TestFsLsForDbfs(t *testing.T) { assert.Equal(t, float64(3), parsedStdout[1]["size"]) } +func TestFsLsForDbfsWithAbsolutePaths(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + tmpDir := temporaryDbfsDir(t, w) + + f, err := filer.NewDbfsClient(w, tmpDir) + require.NoError(t, err) + + err = f.Mkdir(ctx, "a") + require.NoError(t, err) + err = f.Write(ctx, "a/hello.txt", strings.NewReader("abc"), filer.CreateParentDirectories) + require.NoError(t, err) + err = f.Write(ctx, "bye.txt", strings.NewReader("def")) + require.NoError(t, err) + + stdout, stderr := RequireSuccessfulRun(t, "fs", "ls", "dbfs:"+tmpDir, "--output=json", "--absolute") + assert.Equal(t, "", stderr.String()) + var parsedStdout []map[string]any + err = json.Unmarshal(stdout.Bytes(), &parsedStdout) + require.NoError(t, err) + + // assert on ls output + assert.Len(t, parsedStdout, 2) + assert.Equal(t, path.Join("dbfs:", tmpDir, "a"), parsedStdout[0]["name"]) + assert.Equal(t, true, parsedStdout[0]["is_directory"]) + assert.Equal(t, float64(0), parsedStdout[0]["size"]) + + assert.Equal(t, path.Join("dbfs:", tmpDir, "bye.txt"), parsedStdout[1]["name"]) + assert.Equal(t, false, parsedStdout[1]["is_directory"]) + assert.Equal(t, float64(3), parsedStdout[1]["size"]) +} + func TestFsLsForDbfsOnFile(t *testing.T) { t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) diff --git a/internal/repos_test.go b/internal/repos_test.go new file mode 100644 index 00000000..340de334 --- /dev/null +++ b/internal/repos_test.go @@ -0,0 +1,166 @@ +package internal + +import ( + "context" + "fmt" + "strconv" + "testing" + + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func synthesizeTemporaryRepoPath(t *testing.T, w *databricks.WorkspaceClient, ctx context.Context) string { + me, err := w.CurrentUser.Me(ctx) + require.NoError(t, err) + repoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("empty-repo-integration-")) + + // Cleanup if repo was created at specified path. + t.Cleanup(func() { + oi, err := w.Workspace.GetStatusByPath(ctx, repoPath) + if apierr.IsMissing(err) { + return + } + require.NoError(t, err) + err = w.Repos.DeleteByRepoId(ctx, oi.ObjectId) + require.NoError(t, err) + }) + + return repoPath +} + +func createTemporaryRepo(t *testing.T, w *databricks.WorkspaceClient, ctx context.Context) (int64, string) { + repoPath := synthesizeTemporaryRepoPath(t, w, ctx) + repoInfo, err := w.Repos.Create(ctx, workspace.CreateRepo{ + Path: repoPath, + Url: repoUrl, + Provider: "gitHub", + }) + require.NoError(t, err) + return repoInfo.Id, repoPath +} + +func TestReposCreateWithProvider(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + repoPath := synthesizeTemporaryRepoPath(t, w, ctx) + + _, stderr := RequireSuccessfulRun(t, "repos", "create", repoUrl, "gitHub", "--path", repoPath) + assert.Equal(t, "", stderr.String()) + + // Confirm the repo was created. + oi, err := w.Workspace.GetStatusByPath(ctx, repoPath) + assert.NoError(t, err) + assert.Equal(t, workspace.ObjectTypeRepo, oi.ObjectType) +} + +func TestReposCreateWithoutProvider(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + repoPath := synthesizeTemporaryRepoPath(t, w, ctx) + + _, stderr := RequireSuccessfulRun(t, "repos", "create", repoUrl, "--path", repoPath) + assert.Equal(t, "", stderr.String()) + + // Confirm the repo was created. + oi, err := w.Workspace.GetStatusByPath(ctx, repoPath) + assert.NoError(t, err) + assert.Equal(t, workspace.ObjectTypeRepo, oi.ObjectType) +} + +func TestReposGet(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + repoId, repoPath := createTemporaryRepo(t, w, ctx) + + // Get by ID + byIdOutput, stderr := RequireSuccessfulRun(t, "repos", "get", strconv.FormatInt(repoId, 10), "--output=json") + assert.Equal(t, "", stderr.String()) + + // Get by path + byPathOutput, stderr := RequireSuccessfulRun(t, "repos", "get", repoPath, "--output=json") + assert.Equal(t, "", stderr.String()) + + // Output should be the same + assert.Equal(t, byIdOutput.String(), byPathOutput.String()) + + // Get by path fails + _, stderr, err = RequireErrorRun(t, "repos", "get", repoPath+"-doesntexist", "--output=json") + assert.ErrorContains(t, err, "failed to look up repo") + + // Get by path resolves to something other than a repo + _, stderr, err = RequireErrorRun(t, "repos", "get", "/Repos", "--output=json") + assert.ErrorContains(t, err, "is not a repo") +} + +func TestReposUpdate(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + repoId, repoPath := createTemporaryRepo(t, w, ctx) + + // Update by ID + byIdOutput, stderr := RequireSuccessfulRun(t, "repos", "update", strconv.FormatInt(repoId, 10), "--branch", "ide") + assert.Equal(t, "", stderr.String()) + + // Update by path + byPathOutput, stderr := RequireSuccessfulRun(t, "repos", "update", repoPath, "--branch", "ide") + assert.Equal(t, "", stderr.String()) + + // Output should be the same + assert.Equal(t, byIdOutput.String(), byPathOutput.String()) +} + +func TestReposDeleteByID(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + repoId, _ := createTemporaryRepo(t, w, ctx) + + // Delete by ID + stdout, stderr := RequireSuccessfulRun(t, "repos", "delete", strconv.FormatInt(repoId, 10)) + assert.Equal(t, "", stdout.String()) + assert.Equal(t, "", stderr.String()) + + // Check it was actually deleted + _, err = w.Repos.GetByRepoId(ctx, repoId) + assert.True(t, apierr.IsMissing(err), err) +} + +func TestReposDeleteByPath(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + repoId, repoPath := createTemporaryRepo(t, w, ctx) + + // Delete by path + stdout, stderr := RequireSuccessfulRun(t, "repos", "delete", repoPath) + assert.Equal(t, "", stdout.String()) + assert.Equal(t, "", stderr.String()) + + // Check it was actually deleted + _, err = w.Repos.GetByRepoId(ctx, repoId) + assert.True(t, apierr.IsMissing(err), err) +} diff --git a/internal/workspace_test.go b/internal/workspace_test.go index 3aafffe4..dd26bcf4 100644 --- a/internal/workspace_test.go +++ b/internal/workspace_test.go @@ -6,6 +6,7 @@ import ( "io" "net/http" "os" + "path" "path/filepath" "strings" "testing" @@ -39,6 +40,26 @@ func TestWorkpaceGetStatusErrorWhenNoArguments(t *testing.T) { assert.Equal(t, "accepts 1 arg(s), received 0", err.Error()) } +func TestWorkpaceExportPrintsContents(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w := databricks.Must(databricks.NewWorkspaceClient()) + tmpdir := temporaryWorkspaceDir(t, w) + f, err := filer.NewWorkspaceFilesClient(w, tmpdir) + require.NoError(t, err) + + // Write file to workspace + contents := "#!/usr/bin/bash\necho hello, world\n" + err = f.Write(ctx, "file-a", strings.NewReader(contents)) + require.NoError(t, err) + + // Run export + stdout, stderr := RequireSuccessfulRun(t, "workspace", "export", path.Join(tmpdir, "file-a")) + assert.Equal(t, contents, stdout.String()) + assert.Equal(t, "", stderr.String()) +} + func setupWorkspaceImportExportTest(t *testing.T) (context.Context, filer.Filer, string) { t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) diff --git a/libs/cmdio/render.go b/libs/cmdio/render.go index 7b3da190..d641f61d 100644 --- a/libs/cmdio/render.go +++ b/libs/cmdio/render.go @@ -1,6 +1,8 @@ package cmdio import ( + "bytes" + "encoding/base64" "encoding/json" "io" "strings" @@ -93,6 +95,27 @@ func renderTemplate(w io.Writer, tmpl string, v any) error { "pretty_date": func(t time.Time) string { return t.Format("2006-01-02T15:04:05Z") }, + "b64_encode": func(in string) (string, error) { + var out bytes.Buffer + enc := base64.NewEncoder(base64.StdEncoding, &out) + _, err := enc.Write([]byte(in)) + if err != nil { + return "", err + } + err = enc.Close() + if err != nil { + return "", err + } + return out.String(), nil + }, + "b64_decode": func(in string) (string, error) { + dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(in)) + out, err := io.ReadAll(dec) + if err != nil { + return "", err + } + return string(out), nil + }, }).Parse(tmpl) if err != nil { return err diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index 85f87ff5..64eb4b77 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -8,6 +8,7 @@ import ( "net/http" "path" "sort" + "strings" "time" "github.com/databricks/databricks-sdk-go" @@ -67,14 +68,14 @@ type DbfsClient struct { workspaceClient *databricks.WorkspaceClient // File operations will be relative to this path. - root RootPath + root WorkspaceRootPath } func NewDbfsClient(w *databricks.WorkspaceClient, root string) (Filer, error) { return &DbfsClient{ workspaceClient: w, - root: NewRootPath(root), + root: NewWorkspaceRootPath(root), }, nil } @@ -145,22 +146,25 @@ func (w *DbfsClient) Read(ctx context.Context, name string) (io.ReadCloser, erro return nil, err } - // This stat call serves two purposes: - // 1. Checks file at path exists, and throws an error if it does not - // 2. Allows us to error out if the path is a directory. This is needed - // because the Dbfs.Open method on the SDK does not error when the path is - // a directory - // TODO(added 8 June 2023): remove this stat call on go sdk bump. https://github.com/databricks/cli/issues/450 - stat, err := w.Stat(ctx, name) - if err != nil { - return nil, err - } - if stat.IsDir() { - return nil, NotAFile{absPath} - } - handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, files.FileModeRead) if err != nil { + // Return error if file is a directory + if strings.Contains(err.Error(), "cannot open directory for reading") { + return nil, NotAFile{absPath} + } + + var aerr *apierr.APIError + if !errors.As(err, &aerr) { + return nil, err + } + + // This API returns a 404 if the file doesn't exist. + if aerr.StatusCode == http.StatusNotFound { + if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { + return nil, FileDoesNotExistError{absPath} + } + } + return nil, err } diff --git a/libs/filer/files_client.go b/libs/filer/files_client.go index 6c1f5a97..ee7587dc 100644 --- a/libs/filer/files_client.go +++ b/libs/filer/files_client.go @@ -60,7 +60,7 @@ type FilesClient struct { apiClient *client.DatabricksClient // File operations will be relative to this path. - root RootPath + root WorkspaceRootPath } func filesNotImplementedError(fn string) error { @@ -77,7 +77,7 @@ func NewFilesClient(w *databricks.WorkspaceClient, root string) (Filer, error) { workspaceClient: w, apiClient: apiClient, - root: NewRootPath(root), + root: NewWorkspaceRootPath(root), }, nil } diff --git a/libs/filer/local_client.go b/libs/filer/local_client.go index 8df59d25..8d960c84 100644 --- a/libs/filer/local_client.go +++ b/libs/filer/local_client.go @@ -13,12 +13,12 @@ import ( // LocalClient implements the [Filer] interface for the local filesystem. type LocalClient struct { // File operations will be relative to this path. - root RootPath + root localRootPath } func NewLocalClient(root string) (Filer, error) { return &LocalClient{ - root: NewRootPath(root), + root: NewLocalRootPath(root), }, nil } diff --git a/libs/filer/local_root_path.go b/libs/filer/local_root_path.go new file mode 100644 index 00000000..15a54263 --- /dev/null +++ b/libs/filer/local_root_path.go @@ -0,0 +1,27 @@ +package filer + +import ( + "fmt" + "path/filepath" + "strings" +) + +type localRootPath struct { + rootPath string +} + +func NewLocalRootPath(root string) localRootPath { + if root == "" { + return localRootPath{""} + } + return localRootPath{filepath.Clean(root)} +} + +func (rp *localRootPath) Join(name string) (string, error) { + absPath := filepath.Join(rp.rootPath, name) + + if !strings.HasPrefix(absPath, rp.rootPath) { + return "", fmt.Errorf("relative path escapes root: %s", name) + } + return absPath, nil +} diff --git a/libs/filer/local_root_path_test.go b/libs/filer/local_root_path_test.go new file mode 100644 index 00000000..1a39c446 --- /dev/null +++ b/libs/filer/local_root_path_test.go @@ -0,0 +1,142 @@ +package filer + +import ( + "path/filepath" + "runtime" + "testing" + + "github.com/stretchr/testify/assert" +) + +func testUnixLocalRootPath(t *testing.T, uncleanRoot string) { + cleanRoot := filepath.Clean(uncleanRoot) + rp := NewLocalRootPath(uncleanRoot) + + remotePath, err := rp.Join("a/b/c") + assert.NoError(t, err) + assert.Equal(t, cleanRoot+"/a/b/c", remotePath) + + remotePath, err = rp.Join("a/b/../d") + assert.NoError(t, err) + assert.Equal(t, cleanRoot+"/a/d", remotePath) + + remotePath, err = rp.Join("a/../c") + assert.NoError(t, err) + assert.Equal(t, cleanRoot+"/c", remotePath) + + remotePath, err = rp.Join("a/b/c/.") + assert.NoError(t, err) + assert.Equal(t, cleanRoot+"/a/b/c", remotePath) + + remotePath, err = rp.Join("a/b/c/d/./../../f/g") + assert.NoError(t, err) + assert.Equal(t, cleanRoot+"/a/b/f/g", remotePath) + + remotePath, err = rp.Join(".//a/..//./b/..") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join("a/b/../..") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join("") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join(".") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join("/") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + _, err = rp.Join("..") + assert.ErrorContains(t, err, `relative path escapes root: ..`) + + _, err = rp.Join("a/../..") + assert.ErrorContains(t, err, `relative path escapes root: a/../..`) + + _, err = rp.Join("./../.") + assert.ErrorContains(t, err, `relative path escapes root: ./../.`) + + _, err = rp.Join("/./.././..") + assert.ErrorContains(t, err, `relative path escapes root: /./.././..`) + + _, err = rp.Join("./../.") + assert.ErrorContains(t, err, `relative path escapes root: ./../.`) + + _, err = rp.Join("./..") + assert.ErrorContains(t, err, `relative path escapes root: ./..`) + + _, err = rp.Join("./../../..") + assert.ErrorContains(t, err, `relative path escapes root: ./../../..`) + + _, err = rp.Join("./../a/./b../../..") + assert.ErrorContains(t, err, `relative path escapes root: ./../a/./b../../..`) + + _, err = rp.Join("../..") + assert.ErrorContains(t, err, `relative path escapes root: ../..`) +} + +func TestUnixLocalRootPath(t *testing.T) { + if runtime.GOOS != "darwin" && runtime.GOOS != "linux" { + t.SkipNow() + } + + testUnixLocalRootPath(t, "/some/root/path") + testUnixLocalRootPath(t, "/some/root/path/") + testUnixLocalRootPath(t, "/some/root/path/.") + testUnixLocalRootPath(t, "/some/root/../path/") +} + +func testWindowsLocalRootPath(t *testing.T, uncleanRoot string) { + cleanRoot := filepath.Clean(uncleanRoot) + rp := NewLocalRootPath(uncleanRoot) + + remotePath, err := rp.Join(`a\b\c`) + assert.NoError(t, err) + assert.Equal(t, cleanRoot+`\a\b\c`, remotePath) + + remotePath, err = rp.Join(`a\b\..\d`) + assert.NoError(t, err) + assert.Equal(t, cleanRoot+`\a\d`, remotePath) + + remotePath, err = rp.Join(`a\..\c`) + assert.NoError(t, err) + assert.Equal(t, cleanRoot+`\c`, remotePath) + + remotePath, err = rp.Join(`a\b\c\.`) + assert.NoError(t, err) + assert.Equal(t, cleanRoot+`\a\b\c`, remotePath) + + remotePath, err = rp.Join(`a\b\..\..`) + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join("") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join(".") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + _, err = rp.Join("..") + assert.ErrorContains(t, err, `relative path escapes root`) + + _, err = rp.Join(`a\..\..`) + assert.ErrorContains(t, err, `relative path escapes root`) +} + +func TestWindowsLocalRootPath(t *testing.T) { + if runtime.GOOS != "windows" { + t.SkipNow() + } + + testWindowsLocalRootPath(t, `c:\some\root\path`) + testWindowsLocalRootPath(t, `c:\some\root\path\`) + testWindowsLocalRootPath(t, `c:\some\root\path\.`) + testWindowsLocalRootPath(t, `C:\some\root\..\path\`) +} diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 2b5e718b..db06f91c 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -3,7 +3,6 @@ package filer import ( "bytes" "context" - "encoding/base64" "errors" "fmt" "io" @@ -79,7 +78,7 @@ type WorkspaceFilesClient struct { apiClient *client.DatabricksClient // File operations will be relative to this path. - root RootPath + root WorkspaceRootPath } func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, error) { @@ -92,7 +91,7 @@ func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, workspaceClient: w, apiClient: apiClient, - root: NewRootPath(root), + root: NewWorkspaceRootPath(root), }, nil } @@ -187,18 +186,7 @@ func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.ReadCl // Export file contents. Note the /workspace/export API has a limit of 10MBs // for the file size - // TODO: use direct download once it's fixed. see: https://github.com/databricks/cli/issues/452 - res, err := w.workspaceClient.Workspace.Export(ctx, workspace.ExportRequest{ - Path: absPath, - }) - if err != nil { - return nil, err - } - b, err := base64.StdEncoding.DecodeString(res.Content) - if err != nil { - return nil, err - } - return io.NopCloser(bytes.NewReader(b)), nil + return w.workspaceClient.Workspace.Download(ctx, absPath) } func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { diff --git a/libs/filer/root_path.go b/libs/filer/workspace_root_path.go similarity index 65% rename from libs/filer/root_path.go rename to libs/filer/workspace_root_path.go index bdeff5d7..d5163924 100644 --- a/libs/filer/root_path.go +++ b/libs/filer/workspace_root_path.go @@ -6,23 +6,23 @@ import ( "strings" ) -// RootPath can be joined with a relative path and ensures that +// WorkspaceRootPath can be joined with a relative path and ensures that // the returned path is always a strict child of the root path. -type RootPath struct { +type WorkspaceRootPath struct { rootPath string } -// NewRootPath constructs and returns [RootPath]. +// NewWorkspaceRootPath constructs and returns [RootPath]. // The named path is cleaned on construction. -func NewRootPath(name string) RootPath { - return RootPath{ +func NewWorkspaceRootPath(name string) WorkspaceRootPath { + return WorkspaceRootPath{ rootPath: path.Clean(name), } } // Join returns the specified path name joined to the root. // It returns an error if the resulting path is not a strict child of the root path. -func (p *RootPath) Join(name string) (string, error) { +func (p *WorkspaceRootPath) Join(name string) (string, error) { absPath := path.Join(p.rootPath, name) // Don't allow escaping the specified root using relative paths. diff --git a/libs/filer/root_path_test.go b/libs/filer/workspace_root_path_test.go similarity index 98% rename from libs/filer/root_path_test.go rename to libs/filer/workspace_root_path_test.go index 965842d0..73746d27 100644 --- a/libs/filer/root_path_test.go +++ b/libs/filer/workspace_root_path_test.go @@ -9,7 +9,7 @@ import ( func testRootPath(t *testing.T, uncleanRoot string) { cleanRoot := path.Clean(uncleanRoot) - rp := NewRootPath(uncleanRoot) + rp := NewWorkspaceRootPath(uncleanRoot) remotePath, err := rp.Join("a/b/c") assert.NoError(t, err)