From ffdbec87cc5976587e73fd836a7fa8d1eaf6c6c3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 21 Oct 2024 13:45:39 +0200 Subject: [PATCH 01/14] Added support for pip options in environment dependencies (#1842) ## Changes Added support for specifying pip options such as `--extra-index-url` and etc. in environments dependencies ``` environments: - environment_key: Default spec: client: "1" dependencies: - --extra-index-url https://foo@bar.com/packages/smth somepackage - json==1.0.0 ``` ## Tests Added regression test --------- Co-authored-by: Pieter Noordhuis --- bundle/config/mutator/translate_paths_test.go | 6 ++++++ bundle/libraries/local_path.go | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index c03cee73e..9d655b27b 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -699,6 +699,9 @@ func TestTranslatePathJobEnvironments(t *testing.T) { "../dist/env2.whl", "simplejson", "/Workspace/Users/foo@bar.com/test.whl", + "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", + "foobar --extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple", + "https://foo@bar.com/packages/pypi/simple", }, }, }, @@ -719,6 +722,9 @@ func TestTranslatePathJobEnvironments(t *testing.T) { assert.Equal(t, strings.Join([]string{".", "dist", "env2.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) assert.Equal(t, "simplejson", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[2]) assert.Equal(t, "/Workspace/Users/foo@bar.com/test.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[3]) + assert.Equal(t, "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[4]) + assert.Equal(t, "foobar --extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[5]) + assert.Equal(t, "https://foo@bar.com/packages/pypi/simple", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[6]) } func TestTranslatePathWithComplexVariables(t *testing.T) { diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 6d60d56bc..53b714104 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -57,6 +57,12 @@ func IsLibraryLocal(dep string) bool { } } + // If the dependency starts with --, it's a pip flag option which is a valid + // entry for environment dependencies but not a local path + if containsPipFlag(dep) { + return false + } + // If the dependency is a requirements file, it's not a valid local path if strings.HasPrefix(dep, "-r") { return false @@ -70,6 +76,11 @@ func IsLibraryLocal(dep string) bool { return IsLocalPath(dep) } +func containsPipFlag(input string) bool { + re := regexp.MustCompile(`--[a-zA-Z0-9-]+`) + return re.MatchString(input) +} + // ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_). // \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. // ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?): Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). From ca45e53f42c5c4b26f2833554ab7118802c017cb Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 21 Oct 2024 19:56:17 +0200 Subject: [PATCH 02/14] Add script to make testing of code on branches easier (#1844) ## Changes Convenience script to exec into a shell where a CLI build for a specific branch is made available. ## Tests Manually from `/tmp` with `bash <([path]) demo-dashboards`. --- internal/bugbash/README.md | 13 ++++ internal/bugbash/exec.sh | 139 +++++++++++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 internal/bugbash/README.md create mode 100755 internal/bugbash/exec.sh diff --git a/internal/bugbash/README.md b/internal/bugbash/README.md new file mode 100644 index 000000000..941ab6227 --- /dev/null +++ b/internal/bugbash/README.md @@ -0,0 +1,13 @@ +# Bugbash + +The script in this directory can be used to conveniently exec into a shell +where a CLI build for a specific branch is made available. + +## Usage + +This script prompts if you do NOT have at least Bash 5 installed, +but works without command completion with earlier versions. + +```shell +bash <(curl -fsSL https://raw.githubusercontent.com/databricks/cli/main/internal/bugbash/exec.sh) my-branch +``` diff --git a/internal/bugbash/exec.sh b/internal/bugbash/exec.sh new file mode 100755 index 000000000..ac25b16ed --- /dev/null +++ b/internal/bugbash/exec.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Set the GitHub repository for the Databricks CLI. +export GH_REPO="databricks/cli" + +# Synthesize the directory name for the snapshot build. +function cli_snapshot_directory() { + dir="cli" + + # Append OS + case "$(uname -s)" in + Linux) + dir="${dir}_linux" + ;; + Darwin) + dir="${dir}_darwin" + ;; + *) + echo "Unknown operating system: $os" + ;; + esac + + # Append architecture + case "$(uname -m)" in + x86_64) + dir="${dir}_amd64_v1" + ;; + i386|i686) + dir="${dir}_386" + ;; + arm64|aarch64) + dir="${dir}_arm64" + ;; + armv7l|armv8l) + dir="${dir}_arm_6" + ;; + *) + echo "Unknown architecture: $arch" + ;; + esac + + echo $dir +} + +BRANCH=$1 +shift + +# Default to main branch if branch is not specified. +if [ -z "$BRANCH" ]; then + BRANCH=main +fi + +if [ -z "$BRANCH" ]; then + echo "Please specify which branch to bugbash..." + exit 1 +fi + +# Check if the "gh" command is available. +if ! command -v gh &> /dev/null; then + echo "The GitHub CLI (gh) is required to download the snapshot build." + echo "Install and configure it with:" + echo "" + echo " brew install gh" + echo " gh auth login" + echo "" + exit 1 +fi + +echo "Looking for a snapshot build of the Databricks CLI on branch $BRANCH..." + +# Find last successful build on $BRANCH. +last_successful_run_id=$( + gh run list -b "$BRANCH" -w release-snapshot --json 'databaseId,conclusion' | + jq 'limit(1; .[] | select(.conclusion == "success")) | .databaseId' +) +if [ -z "$last_successful_run_id" ]; then + echo "Unable to find last successful build of the release-snapshot workflow for branch $BRANCH." + exit 1 +fi + +# Determine artifact name with the right binaries for this runner. +case "$(uname -s)" in +Linux) + artifact="cli_linux_snapshot" + ;; +Darwin) + artifact="cli_darwin_snapshot" + ;; +esac + +# Create a temporary directory to download the artifact. +dir=$(mktemp -d) + +# Download the artifact. +echo "Downloading the snapshot build..." +gh run download "$last_successful_run_id" -n "$artifact" -D "$dir/.bin" +dir="$dir/.bin/$(cli_snapshot_directory)" +if [ ! -d "$dir" ]; then + echo "Directory does not exist: $dir" + exit 1 +fi + +# Make CLI available on $PATH. +chmod +x "$dir/databricks" +export PATH="$dir:$PATH" + +# Set the prompt to indicate the bugbash environment and exec. +export PS1="(bugbash $BRANCH) \[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ " + +# Display completion instructions. +echo "" +echo "==================================================================" + +if [[ ${BASH_VERSINFO[0]} -lt 5 ]]; then + echo -en "\033[31m" + echo "You have Bash version < 5 installed... completion won't work." + echo -en "\033[0m" + echo "" + echo "Install it with:" + echo "" + echo " brew install bash bash-completion" + echo "" + echo "==================================================================" +fi + +echo "" +echo "To load completions in your current shell session:" +echo "" +echo " source /opt/homebrew/etc/profile.d/bash_completion.sh" +echo " source <(databricks completion bash)" +echo "" +echo "==================================================================" +echo "" + +# Exec into a new shell. +# Note: don't use zsh because on macOS it _always_ overwrites PS1. +exec /usr/bin/env bash From 571076d5e1a637e4a6d4f2873665f1e4a0cb4c7f Mon Sep 17 00:00:00 2001 From: Stephen Macke Date: Mon, 21 Oct 2024 11:27:07 -0700 Subject: [PATCH 03/14] Support Git worktrees for `sync` (#1831) ## Changes This change allows the `sync` command to work from [git worktrees](https://git-scm.com/docs/git-worktree). ## Tests * Added unit tests for traversal of worktree related files. * Manually confirmed that synchronization of files from a main checkout, as well as a worktree, observed the same ignore rules (both locally defined as well as from `$GIT_DIR/info/exclude`). --------- Co-authored-by: Pieter Noordhuis --- libs/git/repository.go | 64 ++++++++++++-------- libs/git/view.go | 2 +- libs/git/worktree.go | 123 ++++++++++++++++++++++++++++++++++++++ libs/git/worktree_test.go | 108 +++++++++++++++++++++++++++++++++ 4 files changed, 271 insertions(+), 26 deletions(-) create mode 100644 libs/git/worktree.go create mode 100644 libs/git/worktree_test.go diff --git a/libs/git/repository.go b/libs/git/repository.go index 82ee7987b..0bbd57865 100644 --- a/libs/git/repository.go +++ b/libs/git/repository.go @@ -23,8 +23,21 @@ type Repository struct { // directory where we process .gitignore files. real bool - // root is the absolute path to the repository root. - root vfs.Path + // rootDir is the path to the root of the repository checkout. + // This can be either the main repository checkout or a worktree checkout. + // For more information about worktrees, see: https://git-scm.com/docs/git-worktree#_description. + rootDir vfs.Path + + // gitDir is the equivalent of $GIT_DIR and points to the + // `.git` directory of a repository or a worktree directory. + // See https://git-scm.com/docs/git-worktree#_details for more information. + gitDir vfs.Path + + // gitCommonDir is the equivalent of $GIT_COMMON_DIR and points to the + // `.git` directory of the main working tree (common between worktrees). + // This is equivalent to [gitDir] if this is the main working tree. + // See https://git-scm.com/docs/git-worktree#_details for more information. + gitCommonDir vfs.Path // ignore contains a list of ignore patterns indexed by the // path prefix relative to the repository root. @@ -44,12 +57,11 @@ type Repository struct { // Root returns the absolute path to the repository root. func (r *Repository) Root() string { - return r.root.Native() + return r.rootDir.Native() } func (r *Repository) CurrentBranch() (string, error) { - // load .git/HEAD - ref, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, "HEAD")) + ref, err := LoadReferenceFile(r.gitDir, "HEAD") if err != nil { return "", err } @@ -65,8 +77,7 @@ func (r *Repository) CurrentBranch() (string, error) { } func (r *Repository) LatestCommit() (string, error) { - // load .git/HEAD - ref, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, "HEAD")) + ref, err := LoadReferenceFile(r.gitDir, "HEAD") if err != nil { return "", err } @@ -80,12 +91,12 @@ func (r *Repository) LatestCommit() (string, error) { return ref.Content, nil } - // read reference from .git/HEAD + // Read reference from $GIT_DIR/HEAD branchHeadPath, err := ref.ResolvePath() if err != nil { return "", err } - branchHeadRef, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, branchHeadPath)) + branchHeadRef, err := LoadReferenceFile(r.gitCommonDir, branchHeadPath) if err != nil { return "", err } @@ -125,7 +136,7 @@ func (r *Repository) loadConfig() error { if err != nil { return fmt.Errorf("unable to load user specific gitconfig: %w", err) } - err = config.loadFile(r.root, ".git/config") + err = config.loadFile(r.gitCommonDir, "config") if err != nil { return fmt.Errorf("unable to load repository specific gitconfig: %w", err) } @@ -133,12 +144,6 @@ func (r *Repository) loadConfig() error { return nil } -// newIgnoreFile constructs a new [ignoreRules] implementation backed by -// a file using the specified path relative to the repository root. -func (r *Repository) newIgnoreFile(relativeIgnoreFilePath string) ignoreRules { - return newIgnoreFile(r.root, relativeIgnoreFilePath) -} - // getIgnoreRules returns a slice of [ignoreRules] that apply // for the specified prefix. The prefix must be cleaned by the caller. // It lazily initializes an entry for the specified prefix if it @@ -149,7 +154,7 @@ func (r *Repository) getIgnoreRules(prefix string) []ignoreRules { return fs } - r.ignore[prefix] = append(r.ignore[prefix], r.newIgnoreFile(path.Join(prefix, gitIgnoreFileName))) + r.ignore[prefix] = append(r.ignore[prefix], newIgnoreFile(r.rootDir, path.Join(prefix, gitIgnoreFileName))) return r.ignore[prefix] } @@ -205,21 +210,30 @@ func (r *Repository) Ignore(relPath string) (bool, error) { func NewRepository(path vfs.Path) (*Repository, error) { real := true - rootPath, err := vfs.FindLeafInTree(path, GitDirectoryName) + rootDir, err := vfs.FindLeafInTree(path, GitDirectoryName) if err != nil { if !errors.Is(err, fs.ErrNotExist) { return nil, err } // Cannot find `.git` directory. - // Treat the specified path as a potential repository root. + // Treat the specified path as a potential repository root checkout. real = false - rootPath = path + rootDir = path + } + + // Derive $GIT_DIR and $GIT_COMMON_DIR paths if this is a real repository. + // If it isn't a real repository, they'll point to the (non-existent) `.git` directory. + gitDir, gitCommonDir, err := resolveGitDirs(rootDir) + if err != nil { + return nil, err } repo := &Repository{ - real: real, - root: rootPath, - ignore: make(map[string][]ignoreRules), + real: real, + rootDir: rootDir, + gitDir: gitDir, + gitCommonDir: gitCommonDir, + ignore: make(map[string][]ignoreRules), } err = repo.loadConfig() @@ -253,9 +267,9 @@ func NewRepository(path vfs.Path) (*Repository, error) { ".git", }), // Load repository-wide exclude file. - repo.newIgnoreFile(".git/info/exclude"), + newIgnoreFile(repo.gitCommonDir, "info/exclude"), // Load root gitignore file. - repo.newIgnoreFile(".gitignore"), + newIgnoreFile(repo.rootDir, ".gitignore"), } return repo, nil diff --git a/libs/git/view.go b/libs/git/view.go index 90eed0bb8..2d2e39a60 100644 --- a/libs/git/view.go +++ b/libs/git/view.go @@ -80,7 +80,7 @@ func NewView(root vfs.Path) (*View, error) { // Target path must be relative to the repository root path. target := root.Native() - prefix := repo.root.Native() + prefix := repo.rootDir.Native() if !strings.HasPrefix(target, prefix) { return nil, fmt.Errorf("path %q is not within repository root %q", root.Native(), prefix) } diff --git a/libs/git/worktree.go b/libs/git/worktree.go new file mode 100644 index 000000000..964c1c95b --- /dev/null +++ b/libs/git/worktree.go @@ -0,0 +1,123 @@ +package git + +import ( + "bufio" + "errors" + "fmt" + "io/fs" + "path/filepath" + "strings" + + "github.com/databricks/cli/libs/vfs" +) + +func readLines(root vfs.Path, name string) ([]string, error) { + file, err := root.Open(name) + if err != nil { + return nil, err + } + + defer file.Close() + + var lines []string + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + + return lines, scanner.Err() +} + +// readGitDir reads the value of the `.git` file in a worktree. +func readGitDir(root vfs.Path) (string, error) { + lines, err := readLines(root, GitDirectoryName) + if err != nil { + return "", err + } + + var gitDir string + for _, line := range lines { + parts := strings.SplitN(line, ": ", 2) + if len(parts) != 2 { + continue + } + + if parts[0] == "gitdir" { + gitDir = strings.TrimSpace(parts[1]) + } + } + + if gitDir == "" { + return "", fmt.Errorf(`expected %q to contain a line with "gitdir: [...]"`, filepath.Join(root.Native(), GitDirectoryName)) + } + + return gitDir, nil +} + +// readGitCommonDir reads the value of the `commondir` file in the `.git` directory of a worktree. +// This file typically contains "../.." to point to $GIT_COMMON_DIR. +func readGitCommonDir(gitDir vfs.Path) (string, error) { + lines, err := readLines(gitDir, "commondir") + if err != nil { + return "", err + } + + if len(lines) == 0 { + return "", errors.New("file is empty") + } + + return strings.TrimSpace(lines[0]), nil +} + +// resolveGitDirs resolves the paths for $GIT_DIR and $GIT_COMMON_DIR. +// The path argument is the root of the checkout where (supposedly) a `.git` file or directory exists. +func resolveGitDirs(root vfs.Path) (vfs.Path, vfs.Path, error) { + fileInfo, err := root.Stat(GitDirectoryName) + if err != nil { + // If the `.git` file or directory does not exist, then this is not a git repository. + // Return paths that we know don't exist, so we do not need to perform nil checks in the caller. + if errors.Is(err, fs.ErrNotExist) { + gitDir := vfs.MustNew(filepath.Join(root.Native(), GitDirectoryName)) + return gitDir, gitDir, nil + } + return nil, nil, err + } + + // If the path is a directory, then it is the main working tree. + // Both $GIT_DIR and $GIT_COMMON_DIR point to the same directory. + if fileInfo.IsDir() { + gitDir := vfs.MustNew(filepath.Join(root.Native(), GitDirectoryName)) + return gitDir, gitDir, nil + } + + // If the path is not a directory, then it is a worktree. + // Read value for $GIT_DIR. + gitDirValue, err := readGitDir(root) + if err != nil { + return nil, nil, err + } + + // Resolve $GIT_DIR. + var gitDir vfs.Path + if filepath.IsAbs(gitDirValue) { + gitDir = vfs.MustNew(gitDirValue) + } else { + gitDir = vfs.MustNew(filepath.Join(root.Native(), gitDirValue)) + } + + // Read value for $GIT_COMMON_DIR. + gitCommonDirValue, err := readGitCommonDir(gitDir) + if err != nil { + return nil, nil, fmt.Errorf(`expected "commondir" file in worktree git folder at %q: %w`, gitDir.Native(), err) + } + + // Resolve $GIT_COMMON_DIR. + var gitCommonDir vfs.Path + if filepath.IsAbs(gitCommonDirValue) { + gitCommonDir = vfs.MustNew(gitCommonDirValue) + } else { + gitCommonDir = vfs.MustNew(filepath.Join(gitDir.Native(), gitCommonDirValue)) + } + + return gitDir, gitCommonDir, nil +} diff --git a/libs/git/worktree_test.go b/libs/git/worktree_test.go new file mode 100644 index 000000000..3d620c483 --- /dev/null +++ b/libs/git/worktree_test.go @@ -0,0 +1,108 @@ +package git + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func setupWorktree(t *testing.T) string { + var err error + + tmpDir := t.TempDir() + + // Checkout path + err = os.MkdirAll(filepath.Join(tmpDir, "my_worktree"), os.ModePerm) + require.NoError(t, err) + + // Main $GIT_COMMON_DIR + err = os.MkdirAll(filepath.Join(tmpDir, ".git"), os.ModePerm) + require.NoError(t, err) + + // Worktree $GIT_DIR + err = os.MkdirAll(filepath.Join(tmpDir, ".git/worktrees/my_worktree"), os.ModePerm) + require.NoError(t, err) + + return tmpDir +} + +func writeGitDir(t *testing.T, dir, content string) { + err := os.WriteFile(filepath.Join(dir, "my_worktree/.git"), []byte(content), os.ModePerm) + require.NoError(t, err) +} + +func writeGitCommonDir(t *testing.T, dir, content string) { + err := os.WriteFile(filepath.Join(dir, ".git/worktrees/my_worktree/commondir"), []byte(content), os.ModePerm) + require.NoError(t, err) +} + +func verifyCorrectDirs(t *testing.T, dir string) { + gitDir, gitCommonDir, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + require.NoError(t, err) + assert.Equal(t, filepath.Join(dir, ".git/worktrees/my_worktree"), gitDir.Native()) + assert.Equal(t, filepath.Join(dir, ".git"), gitCommonDir.Native()) +} + +func TestWorktreeResolveGitDir(t *testing.T) { + dir := setupWorktree(t) + writeGitCommonDir(t, dir, "../..") + + t.Run("relative", func(t *testing.T) { + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", "../.git/worktrees/my_worktree")) + verifyCorrectDirs(t, dir) + }) + + t.Run("absolute", func(t *testing.T) { + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", filepath.Join(dir, ".git/worktrees/my_worktree"))) + verifyCorrectDirs(t, dir) + }) + + t.Run("additional spaces", func(t *testing.T) { + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s \n\n\n", "../.git/worktrees/my_worktree")) + verifyCorrectDirs(t, dir) + }) + + t.Run("empty", func(t *testing.T) { + writeGitDir(t, dir, "") + + _, _, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + assert.ErrorContains(t, err, ` to contain a line with "gitdir: [...]"`) + }) +} + +func TestWorktreeResolveCommonDir(t *testing.T) { + dir := setupWorktree(t) + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", "../.git/worktrees/my_worktree")) + + t.Run("relative", func(t *testing.T) { + writeGitCommonDir(t, dir, "../..") + verifyCorrectDirs(t, dir) + }) + + t.Run("absolute", func(t *testing.T) { + writeGitCommonDir(t, dir, filepath.Join(dir, ".git")) + verifyCorrectDirs(t, dir) + }) + + t.Run("additional spaces", func(t *testing.T) { + writeGitCommonDir(t, dir, " ../.. \n\n\n") + verifyCorrectDirs(t, dir) + }) + + t.Run("empty", func(t *testing.T) { + writeGitCommonDir(t, dir, "") + + _, _, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + assert.ErrorContains(t, err, `expected "commondir" file in worktree git folder at `) + }) + + t.Run("missing", func(t *testing.T) { + _, _, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + assert.ErrorContains(t, err, `expected "commondir" file in worktree git folder at `) + }) +} From f8bb3a8d729b4cbe6313c1e313379dbe70f6b680 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 11:37:01 +0200 Subject: [PATCH 04/14] Bump github.com/databricks/databricks-sdk-go from 0.48.0 to 0.49.0 (#1843) Bumps [github.com/databricks/databricks-sdk-go](https://github.com/databricks/databricks-sdk-go) from 0.48.0 to 0.49.0.
Release notes

Sourced from github.com/databricks/databricks-sdk-go's releases.

v0.49.0

API Changes:

OpenAPI SHA: cf9c61453990df0f9453670f2fe68e1b128647a2, Date: 2024-10-14

Changelog

Sourced from github.com/databricks/databricks-sdk-go's changelog.

[Release] Release v0.49.0

API Changes:

OpenAPI SHA: cf9c61453990df0f9453670f2fe68e1b128647a2, Date: 2024-10-14

Commits

Most Recent Ignore Conditions Applied to This Pull Request | Dependency Name | Ignore Conditions | | --- | --- | | github.com/databricks/databricks-sdk-go | [>= 0.28.a, < 0.29] |
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/databricks/databricks-sdk-go&package-manager=go_modules&previous-version=0.48.0&new-version=0.49.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
--------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Andrew Nester --- .codegen/_openapi_sha | 2 +- .gitattributes | 1 + bundle/schema/jsonschema.json | 48 +++- cmd/workspace/apps/apps.go | 3 - .../disable-legacy-dbfs.go | 220 ++++++++++++++++++ cmd/workspace/jobs/jobs.go | 1 + cmd/workspace/settings/settings.go | 2 + go.mod | 2 +- go.sum | 4 +- 9 files changed, 275 insertions(+), 8 deletions(-) create mode 100755 cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index 303c78553..2d9cb6d86 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -0c86ea6dbd9a730c24ff0d4e509603e476955ac5 \ No newline at end of file +cf9c61453990df0f9453670f2fe68e1b128647a2 \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index 2470eb33d..ae10198bb 100755 --- a/.gitattributes +++ b/.gitattributes @@ -54,6 +54,7 @@ cmd/workspace/dashboards/dashboards.go linguist-generated=true cmd/workspace/data-sources/data-sources.go linguist-generated=true cmd/workspace/default-namespace/default-namespace.go linguist-generated=true cmd/workspace/disable-legacy-access/disable-legacy-access.go linguist-generated=true +cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go linguist-generated=true cmd/workspace/enhanced-security-monitoring/enhanced-security-monitoring.go linguist-generated=true cmd/workspace/experiments/experiments.go linguist-generated=true cmd/workspace/external-locations/external-locations.go linguist-generated=true diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 06b9cc15a..178656fe0 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -209,6 +209,10 @@ { "type": "object", "properties": { + "budget_policy_id": { + "description": "The id of the user specified budget policy to use for this job.\nIf not specified, a default budget policy may be applied when creating or modifying the job.\nSee `effective_budget_policy_id` for the budget policy used by this workload.", + "$ref": "#/$defs/string" + }, "continuous": { "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Continuous" @@ -3901,6 +3905,10 @@ { "type": "object", "properties": { + "report": { + "description": "Select tables from a specific source report.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.ReportSpec" + }, "schema": { "description": "Select tables from a specific source schema.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.SchemaSpec" @@ -4233,6 +4241,40 @@ } ] }, + "pipelines.ReportSpec": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination_catalog": { + "description": "Required. Destination catalog to store table.", + "$ref": "#/$defs/string" + }, + "destination_schema": { + "description": "Required. Destination schema to store table.", + "$ref": "#/$defs/string" + }, + "destination_table": { + "description": "Required. Destination table name. The pipeline fails if a table with that name already exists.", + "$ref": "#/$defs/string" + }, + "source_url": { + "description": "Required. Report URL in the source system.", + "$ref": "#/$defs/string" + }, + "table_configuration": { + "description": "Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "pipelines.SchemaSpec": { "anyOf": [ { @@ -4281,7 +4323,7 @@ "$ref": "#/$defs/string" }, "destination_table": { - "description": "Optional. Destination table name. The pipeline fails If a table with that name already exists. If not set, the source table name is used.", + "description": "Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used.", "$ref": "#/$defs/string" }, "source_catalog": { @@ -4329,6 +4371,10 @@ "SCD_TYPE_1", "SCD_TYPE_2" ] + }, + "sequence_by": { + "description": "The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order.", + "$ref": "#/$defs/slice/string" } }, "additionalProperties": false diff --git a/cmd/workspace/apps/apps.go b/cmd/workspace/apps/apps.go index 4cee2f82a..9331ddc2e 100755 --- a/cmd/workspace/apps/apps.go +++ b/cmd/workspace/apps/apps.go @@ -28,9 +28,6 @@ func New() *cobra.Command { Annotations: map[string]string{ "package": "apps", }, - - // This service is being previewed; hide from help output. - Hidden: true, } // Add methods diff --git a/cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go b/cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go new file mode 100755 index 000000000..d09755370 --- /dev/null +++ b/cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go @@ -0,0 +1,220 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package disable_legacy_dbfs + +import ( + "fmt" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + "github.com/databricks/databricks-sdk-go/service/settings" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "disable-legacy-dbfs", + Short: `When this setting is on, access to DBFS root and DBFS mounts is disallowed (as well as creation of new mounts).`, + Long: `When this setting is on, access to DBFS root and DBFS mounts is disallowed (as + well as creation of new mounts). When the setting is off, all DBFS + functionality is enabled`, + + // This service is being previewed; hide from help output. + Hidden: true, + } + + // Add methods + cmd.AddCommand(newDelete()) + cmd.AddCommand(newGet()) + cmd.AddCommand(newUpdate()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start delete command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var deleteOverrides []func( + *cobra.Command, + *settings.DeleteDisableLegacyDbfsRequest, +) + +func newDelete() *cobra.Command { + cmd := &cobra.Command{} + + var deleteReq settings.DeleteDisableLegacyDbfsRequest + + // TODO: short flags + + cmd.Flags().StringVar(&deleteReq.Etag, "etag", deleteReq.Etag, `etag used for versioning.`) + + cmd.Use = "delete" + cmd.Short = `Delete the disable legacy DBFS setting.` + cmd.Long = `Delete the disable legacy DBFS setting. + + Deletes the disable legacy DBFS setting for a workspace, reverting back to the + default.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(0) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + response, err := w.Settings.DisableLegacyDbfs().Delete(ctx, deleteReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range deleteOverrides { + fn(cmd, &deleteReq) + } + + return cmd +} + +// start get command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var getOverrides []func( + *cobra.Command, + *settings.GetDisableLegacyDbfsRequest, +) + +func newGet() *cobra.Command { + cmd := &cobra.Command{} + + var getReq settings.GetDisableLegacyDbfsRequest + + // TODO: short flags + + cmd.Flags().StringVar(&getReq.Etag, "etag", getReq.Etag, `etag used for versioning.`) + + cmd.Use = "get" + cmd.Short = `Get the disable legacy DBFS setting.` + cmd.Long = `Get the disable legacy DBFS setting. + + Gets the disable legacy DBFS setting.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(0) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + response, err := w.Settings.DisableLegacyDbfs().Get(ctx, getReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range getOverrides { + fn(cmd, &getReq) + } + + return cmd +} + +// start update command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var updateOverrides []func( + *cobra.Command, + *settings.UpdateDisableLegacyDbfsRequest, +) + +func newUpdate() *cobra.Command { + cmd := &cobra.Command{} + + var updateReq settings.UpdateDisableLegacyDbfsRequest + var updateJson flags.JsonFlag + + // TODO: short flags + cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + + cmd.Use = "update" + cmd.Short = `Update the disable legacy DBFS setting.` + cmd.Long = `Update the disable legacy DBFS setting. + + Updates the disable legacy DBFS setting for the workspace.` + + cmd.Annotations = make(map[string]string) + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + if cmd.Flags().Changed("json") { + diags := updateJson.Unmarshal(&updateReq) + if diags.HasError() { + return diags.Error() + } + if len(diags) > 0 { + err := cmdio.RenderDiagnosticsToErrorOut(ctx, diags) + if err != nil { + return err + } + } + } else { + return fmt.Errorf("please provide command input in JSON format by specifying the --json flag") + } + + response, err := w.Settings.DisableLegacyDbfs().Update(ctx, updateReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range updateOverrides { + fn(cmd, &updateReq) + } + + return cmd +} + +// end service DisableLegacyDbfs diff --git a/cmd/workspace/jobs/jobs.go b/cmd/workspace/jobs/jobs.go index d4ceb0c28..9e8db43d0 100755 --- a/cmd/workspace/jobs/jobs.go +++ b/cmd/workspace/jobs/jobs.go @@ -1557,6 +1557,7 @@ func newSubmit() *cobra.Command { cmd.Flags().Var(&submitJson, "json", `either inline JSON string or @path/to/file.json with request body`) // TODO: array: access_control_list + cmd.Flags().StringVar(&submitReq.BudgetPolicyId, "budget-policy-id", submitReq.BudgetPolicyId, `The user specified id of the budget policy to use for this one-time run.`) // TODO: complex arg: email_notifications // TODO: array: environments // TODO: complex arg: git_source diff --git a/cmd/workspace/settings/settings.go b/cmd/workspace/settings/settings.go index aaeecf41b..31e6ceee4 100755 --- a/cmd/workspace/settings/settings.go +++ b/cmd/workspace/settings/settings.go @@ -9,6 +9,7 @@ import ( compliance_security_profile "github.com/databricks/cli/cmd/workspace/compliance-security-profile" default_namespace "github.com/databricks/cli/cmd/workspace/default-namespace" disable_legacy_access "github.com/databricks/cli/cmd/workspace/disable-legacy-access" + disable_legacy_dbfs "github.com/databricks/cli/cmd/workspace/disable-legacy-dbfs" enhanced_security_monitoring "github.com/databricks/cli/cmd/workspace/enhanced-security-monitoring" restrict_workspace_admins "github.com/databricks/cli/cmd/workspace/restrict-workspace-admins" ) @@ -33,6 +34,7 @@ func New() *cobra.Command { cmd.AddCommand(compliance_security_profile.New()) cmd.AddCommand(default_namespace.New()) cmd.AddCommand(disable_legacy_access.New()) + cmd.AddCommand(disable_legacy_dbfs.New()) cmd.AddCommand(enhanced_security_monitoring.New()) cmd.AddCommand(restrict_workspace_admins.New()) diff --git a/go.mod b/go.mod index 697205f33..9059b9637 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ toolchain go1.22.7 require ( github.com/Masterminds/semver/v3 v3.3.0 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 - github.com/databricks/databricks-sdk-go v0.48.0 // Apache 2.0 + github.com/databricks/databricks-sdk-go v0.49.0 // Apache 2.0 github.com/fatih/color v1.17.0 // MIT github.com/ghodss/yaml v1.0.0 // MIT + NOTICE github.com/google/uuid v1.6.0 // BSD-3-Clause diff --git a/go.sum b/go.sum index 03698b20a..f365fcbf6 100644 --- a/go.sum +++ b/go.sum @@ -32,8 +32,8 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg= github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/databricks/databricks-sdk-go v0.48.0 h1:46KtsnRo+FGhC3izUXbpL0PXBNomvsdignYDhJZlm9s= -github.com/databricks/databricks-sdk-go v0.48.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= +github.com/databricks/databricks-sdk-go v0.49.0 h1:VBTeZZMLIuBSM4kxOCfUcW9z4FUQZY2QeNRD5qm9FUQ= +github.com/databricks/databricks-sdk-go v0.49.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= From 68d69d6e0bb420cdfbdceceb686717912187980e Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 22 Oct 2024 12:43:43 +0200 Subject: [PATCH 05/14] Upgrade TF provider to 1.54.0 (#1852) ## Changes Upgrade TF provider to 1.54.0 --- bundle/internal/tf/codegen/schema/version.go | 2 +- .../data_source_notification_destinations.go | 15 +++++++++ .../tf/schema/data_source_registered_model.go | 32 +++++++++++++++++++ bundle/internal/tf/schema/data_sources.go | 4 +++ bundle/internal/tf/schema/resource_job.go | 1 + .../tf/schema/resource_online_table.go | 11 ++++--- .../internal/tf/schema/resource_pipeline.go | 19 +++++++++++ bundle/internal/tf/schema/root.go | 2 +- 8 files changed, 79 insertions(+), 7 deletions(-) create mode 100644 bundle/internal/tf/schema/data_source_notification_destinations.go create mode 100644 bundle/internal/tf/schema/data_source_registered_model.go diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index 49e48a6e3..0c4244089 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,3 @@ package schema -const ProviderVersion = "1.53.0" +const ProviderVersion = "1.54.0" diff --git a/bundle/internal/tf/schema/data_source_notification_destinations.go b/bundle/internal/tf/schema/data_source_notification_destinations.go new file mode 100644 index 000000000..c95ad6db9 --- /dev/null +++ b/bundle/internal/tf/schema/data_source_notification_destinations.go @@ -0,0 +1,15 @@ +// Generated from Databricks Terraform provider schema. DO NOT EDIT. + +package schema + +type DataSourceNotificationDestinationsNotificationDestinations struct { + DestinationType string `json:"destination_type,omitempty"` + DisplayName string `json:"display_name,omitempty"` + Id string `json:"id,omitempty"` +} + +type DataSourceNotificationDestinations struct { + DisplayNameContains string `json:"display_name_contains,omitempty"` + Type string `json:"type,omitempty"` + NotificationDestinations []DataSourceNotificationDestinationsNotificationDestinations `json:"notification_destinations,omitempty"` +} diff --git a/bundle/internal/tf/schema/data_source_registered_model.go b/bundle/internal/tf/schema/data_source_registered_model.go new file mode 100644 index 000000000..e19e0849a --- /dev/null +++ b/bundle/internal/tf/schema/data_source_registered_model.go @@ -0,0 +1,32 @@ +// Generated from Databricks Terraform provider schema. DO NOT EDIT. + +package schema + +type DataSourceRegisteredModelModelInfoAliases struct { + AliasName string `json:"alias_name,omitempty"` + VersionNum int `json:"version_num,omitempty"` +} + +type DataSourceRegisteredModelModelInfo struct { + BrowseOnly bool `json:"browse_only,omitempty"` + CatalogName string `json:"catalog_name,omitempty"` + Comment string `json:"comment,omitempty"` + CreatedAt int `json:"created_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + FullName string `json:"full_name,omitempty"` + MetastoreId string `json:"metastore_id,omitempty"` + Name string `json:"name,omitempty"` + Owner string `json:"owner,omitempty"` + SchemaName string `json:"schema_name,omitempty"` + StorageLocation string `json:"storage_location,omitempty"` + UpdatedAt int `json:"updated_at,omitempty"` + UpdatedBy string `json:"updated_by,omitempty"` + Aliases []DataSourceRegisteredModelModelInfoAliases `json:"aliases,omitempty"` +} + +type DataSourceRegisteredModel struct { + FullName string `json:"full_name"` + IncludeAliases bool `json:"include_aliases,omitempty"` + IncludeBrowse bool `json:"include_browse,omitempty"` + ModelInfo []DataSourceRegisteredModelModelInfo `json:"model_info,omitempty"` +} diff --git a/bundle/internal/tf/schema/data_sources.go b/bundle/internal/tf/schema/data_sources.go index 10829b994..050e0bc1d 100644 --- a/bundle/internal/tf/schema/data_sources.go +++ b/bundle/internal/tf/schema/data_sources.go @@ -36,7 +36,9 @@ type DataSources struct { NodeType map[string]any `json:"databricks_node_type,omitempty"` Notebook map[string]any `json:"databricks_notebook,omitempty"` NotebookPaths map[string]any `json:"databricks_notebook_paths,omitempty"` + NotificationDestinations map[string]any `json:"databricks_notification_destinations,omitempty"` Pipelines map[string]any `json:"databricks_pipelines,omitempty"` + RegisteredModel map[string]any `json:"databricks_registered_model,omitempty"` Schema map[string]any `json:"databricks_schema,omitempty"` Schemas map[string]any `json:"databricks_schemas,omitempty"` ServicePrincipal map[string]any `json:"databricks_service_principal,omitempty"` @@ -92,7 +94,9 @@ func NewDataSources() *DataSources { NodeType: make(map[string]any), Notebook: make(map[string]any), NotebookPaths: make(map[string]any), + NotificationDestinations: make(map[string]any), Pipelines: make(map[string]any), + RegisteredModel: make(map[string]any), Schema: make(map[string]any), Schemas: make(map[string]any), ServicePrincipal: make(map[string]any), diff --git a/bundle/internal/tf/schema/resource_job.go b/bundle/internal/tf/schema/resource_job.go index 42b648b0f..c89eafab9 100644 --- a/bundle/internal/tf/schema/resource_job.go +++ b/bundle/internal/tf/schema/resource_job.go @@ -1448,6 +1448,7 @@ type ResourceJobWebhookNotifications struct { type ResourceJob struct { AlwaysRunning bool `json:"always_running,omitempty"` + BudgetPolicyId string `json:"budget_policy_id,omitempty"` ControlRunState bool `json:"control_run_state,omitempty"` Description string `json:"description,omitempty"` EditMode string `json:"edit_mode,omitempty"` diff --git a/bundle/internal/tf/schema/resource_online_table.go b/bundle/internal/tf/schema/resource_online_table.go index de671eade..58d6f4ba5 100644 --- a/bundle/internal/tf/schema/resource_online_table.go +++ b/bundle/internal/tf/schema/resource_online_table.go @@ -19,9 +19,10 @@ type ResourceOnlineTableSpec struct { } type ResourceOnlineTable struct { - Id string `json:"id,omitempty"` - Name string `json:"name"` - Status []any `json:"status,omitempty"` - TableServingUrl string `json:"table_serving_url,omitempty"` - Spec *ResourceOnlineTableSpec `json:"spec,omitempty"` + Id string `json:"id,omitempty"` + Name string `json:"name"` + Status []any `json:"status,omitempty"` + TableServingUrl string `json:"table_serving_url,omitempty"` + UnityCatalogProvisioningState string `json:"unity_catalog_provisioning_state,omitempty"` + Spec *ResourceOnlineTableSpec `json:"spec,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_pipeline.go b/bundle/internal/tf/schema/resource_pipeline.go index 1bed91fcb..2cb459aba 100644 --- a/bundle/internal/tf/schema/resource_pipeline.go +++ b/bundle/internal/tf/schema/resource_pipeline.go @@ -142,10 +142,26 @@ type ResourcePipelineGatewayDefinition struct { GatewayStorageSchema string `json:"gateway_storage_schema,omitempty"` } +type ResourcePipelineIngestionDefinitionObjectsReportTableConfiguration struct { + PrimaryKeys []string `json:"primary_keys,omitempty"` + SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` + ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` +} + +type ResourcePipelineIngestionDefinitionObjectsReport struct { + DestinationCatalog string `json:"destination_catalog,omitempty"` + DestinationSchema string `json:"destination_schema,omitempty"` + DestinationTable string `json:"destination_table,omitempty"` + SourceUrl string `json:"source_url,omitempty"` + TableConfiguration *ResourcePipelineIngestionDefinitionObjectsReportTableConfiguration `json:"table_configuration,omitempty"` +} + type ResourcePipelineIngestionDefinitionObjectsSchemaTableConfiguration struct { PrimaryKeys []string `json:"primary_keys,omitempty"` SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` } type ResourcePipelineIngestionDefinitionObjectsSchema struct { @@ -160,6 +176,7 @@ type ResourcePipelineIngestionDefinitionObjectsTableTableConfiguration struct { PrimaryKeys []string `json:"primary_keys,omitempty"` SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` } type ResourcePipelineIngestionDefinitionObjectsTable struct { @@ -173,6 +190,7 @@ type ResourcePipelineIngestionDefinitionObjectsTable struct { } type ResourcePipelineIngestionDefinitionObjects struct { + Report *ResourcePipelineIngestionDefinitionObjectsReport `json:"report,omitempty"` Schema *ResourcePipelineIngestionDefinitionObjectsSchema `json:"schema,omitempty"` Table *ResourcePipelineIngestionDefinitionObjectsTable `json:"table,omitempty"` } @@ -181,6 +199,7 @@ type ResourcePipelineIngestionDefinitionTableConfiguration struct { PrimaryKeys []string `json:"primary_keys,omitempty"` SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` } type ResourcePipelineIngestionDefinition struct { diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index 7a0cc01f9..bf4283c9b 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -21,7 +21,7 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" -const ProviderVersion = "1.53.0" +const ProviderVersion = "1.54.0" func NewRoot() *Root { return &Root{ From 3bab21e72ee78f6711519762317e8218f9884afd Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:29:17 +0530 Subject: [PATCH 06/14] Fix race condition when restarting continuous jobs (#1849) ## Changes We don't need to cancel existing runs when the job is continuous and unpaused. The `/jobs/run-now` command will cancel the existing run and trigger a new one automatically. Cancelling the job manually can cause a race condition where both the manual trigger from the CLI and the continuous trigger from the job configuration happens at the same time. This PR prevents that from happening. ## Tests Unit tests and manually --- bundle/run/job.go | 23 +++++++ bundle/run/job_test.go | 132 ++++++++++++++++++++++++++++++++++++ bundle/run/pipeline.go | 12 ++++ bundle/run/pipeline_test.go | 70 +++++++++++++++++++ bundle/run/runner.go | 4 ++ cmd/bundle/run.go | 14 ++-- 6 files changed, 247 insertions(+), 8 deletions(-) diff --git a/bundle/run/job.go b/bundle/run/job.go index 8003c7d29..340af961c 100644 --- a/bundle/run/job.go +++ b/bundle/run/job.go @@ -317,6 +317,29 @@ func (r *jobRunner) Cancel(ctx context.Context) error { return errGroup.Wait() } +func (r *jobRunner) Restart(ctx context.Context, opts *Options) (output.RunOutput, error) { + // We don't need to cancel existing runs if the job is continuous and unpaused. + // the /jobs/run-now API will automatically cancel any existing runs before starting a new one. + // + // /jobs/run-now will not cancel existing runs if the job is continuous and paused. + // New job runs will be queued instead and will wait for existing runs to finish. + // In this case, we need to cancel the existing runs before starting a new one. + continuous := r.job.JobSettings.Continuous + if continuous != nil && continuous.PauseStatus == jobs.PauseStatusUnpaused { + return r.Run(ctx, opts) + } + + s := cmdio.Spinner(ctx) + s <- "Cancelling all active job runs" + err := r.Cancel(ctx) + close(s) + if err != nil { + return nil, err + } + + return r.Run(ctx, opts) +} + func (r *jobRunner) ParseArgs(args []string, opts *Options) error { return r.posArgsHandler().ParseArgs(args, opts) } diff --git a/bundle/run/job_test.go b/bundle/run/job_test.go index be189306b..369c546aa 100644 --- a/bundle/run/job_test.go +++ b/bundle/run/job_test.go @@ -1,6 +1,7 @@ package run import ( + "bytes" "context" "testing" "time" @@ -8,6 +9,8 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" "github.com/databricks/databricks-sdk-go/experimental/mocks" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/mock" @@ -126,3 +129,132 @@ func TestJobRunnerCancelWithNoActiveRuns(t *testing.T) { err := runner.Cancel(context.Background()) require.NoError(t, err) } + +func TestJobRunnerRestart(t *testing.T) { + for _, jobSettings := range []*jobs.JobSettings{ + {}, + { + Continuous: &jobs.Continuous{ + PauseStatus: jobs.PauseStatusPaused, + }, + }, + } { + job := &resources.Job{ + ID: "123", + JobSettings: jobSettings, + } + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "test_job": job, + }, + }, + }, + } + + runner := jobRunner{key: "test", bundle: b, job: job} + + m := mocks.NewMockWorkspaceClient(t) + b.SetWorkpaceClient(m.WorkspaceClient) + ctx := context.Background() + ctx = cmdio.InContext(ctx, cmdio.NewIO(flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "")) + ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) + + jobApi := m.GetMockJobsAPI() + jobApi.EXPECT().ListRunsAll(mock.Anything, jobs.ListRunsRequest{ + ActiveOnly: true, + JobId: 123, + }).Return([]jobs.BaseRun{ + {RunId: 1}, + {RunId: 2}, + }, nil) + + // Mock the runner cancelling existing job runs. + mockWait := &jobs.WaitGetRunJobTerminatedOrSkipped[struct{}]{ + Poll: func(time time.Duration, f func(j *jobs.Run)) (*jobs.Run, error) { + return nil, nil + }, + } + jobApi.EXPECT().CancelRun(mock.Anything, jobs.CancelRun{ + RunId: 1, + }).Return(mockWait, nil) + jobApi.EXPECT().CancelRun(mock.Anything, jobs.CancelRun{ + RunId: 2, + }).Return(mockWait, nil) + + // Mock the runner triggering a job run + mockWaitForRun := &jobs.WaitGetRunJobTerminatedOrSkipped[jobs.RunNowResponse]{ + Poll: func(d time.Duration, f func(*jobs.Run)) (*jobs.Run, error) { + return &jobs.Run{ + State: &jobs.RunState{ + ResultState: jobs.RunResultStateSuccess, + }, + }, nil + }, + } + jobApi.EXPECT().RunNow(mock.Anything, jobs.RunNow{ + JobId: 123, + }).Return(mockWaitForRun, nil) + + // Mock the runner getting the job output + jobApi.EXPECT().GetRun(mock.Anything, jobs.GetRunRequest{}).Return(&jobs.Run{}, nil) + + _, err := runner.Restart(ctx, &Options{}) + require.NoError(t, err) + } +} + +func TestJobRunnerRestartForContinuousUnpausedJobs(t *testing.T) { + job := &resources.Job{ + ID: "123", + JobSettings: &jobs.JobSettings{ + Continuous: &jobs.Continuous{ + PauseStatus: jobs.PauseStatusUnpaused, + }, + }, + } + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "test_job": job, + }, + }, + }, + } + + runner := jobRunner{key: "test", bundle: b, job: job} + + m := mocks.NewMockWorkspaceClient(t) + b.SetWorkpaceClient(m.WorkspaceClient) + ctx := context.Background() + ctx = cmdio.InContext(ctx, cmdio.NewIO(flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) + + jobApi := m.GetMockJobsAPI() + + // The runner should not try and cancel existing job runs for unpaused continuous jobs. + jobApi.AssertNotCalled(t, "ListRunsAll") + jobApi.AssertNotCalled(t, "CancelRun") + + // Mock the runner triggering a job run + mockWaitForRun := &jobs.WaitGetRunJobTerminatedOrSkipped[jobs.RunNowResponse]{ + Poll: func(d time.Duration, f func(*jobs.Run)) (*jobs.Run, error) { + return &jobs.Run{ + State: &jobs.RunState{ + ResultState: jobs.RunResultStateSuccess, + }, + }, nil + }, + } + jobApi.EXPECT().RunNow(mock.Anything, jobs.RunNow{ + JobId: 123, + }).Return(mockWaitForRun, nil) + + // Mock the runner getting the job output + jobApi.EXPECT().GetRun(mock.Anything, jobs.GetRunRequest{}).Return(&jobs.Run{}, nil) + + _, err := runner.Restart(ctx, &Options{}) + require.NoError(t, err) +} diff --git a/bundle/run/pipeline.go b/bundle/run/pipeline.go index d684f8388..ffe012843 100644 --- a/bundle/run/pipeline.go +++ b/bundle/run/pipeline.go @@ -183,6 +183,18 @@ func (r *pipelineRunner) Cancel(ctx context.Context) error { return err } +func (r *pipelineRunner) Restart(ctx context.Context, opts *Options) (output.RunOutput, error) { + s := cmdio.Spinner(ctx) + s <- "Cancelling the active pipeline update" + err := r.Cancel(ctx) + close(s) + if err != nil { + return nil, err + } + + return r.Run(ctx, opts) +} + func (r *pipelineRunner) ParseArgs(args []string, opts *Options) error { if len(args) == 0 { return nil diff --git a/bundle/run/pipeline_test.go b/bundle/run/pipeline_test.go index 29b57ffdb..e4608061c 100644 --- a/bundle/run/pipeline_test.go +++ b/bundle/run/pipeline_test.go @@ -1,6 +1,7 @@ package run import ( + "bytes" "context" "testing" "time" @@ -8,8 +9,12 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + sdk_config "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/experimental/mocks" "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) @@ -47,3 +52,68 @@ func TestPipelineRunnerCancel(t *testing.T) { err := runner.Cancel(context.Background()) require.NoError(t, err) } + +func TestPipelineRunnerRestart(t *testing.T) { + pipeline := &resources.Pipeline{ + ID: "123", + } + + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "test_pipeline": pipeline, + }, + }, + }, + } + + runner := pipelineRunner{key: "test", bundle: b, pipeline: pipeline} + + m := mocks.NewMockWorkspaceClient(t) + m.WorkspaceClient.Config = &sdk_config.Config{ + Host: "https://test.com", + } + b.SetWorkpaceClient(m.WorkspaceClient) + ctx := context.Background() + ctx = cmdio.InContext(ctx, cmdio.NewIO(flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) + + mockWait := &pipelines.WaitGetPipelineIdle[struct{}]{ + Poll: func(time.Duration, func(*pipelines.GetPipelineResponse)) (*pipelines.GetPipelineResponse, error) { + return nil, nil + }, + } + + pipelineApi := m.GetMockPipelinesAPI() + pipelineApi.EXPECT().Stop(mock.Anything, pipelines.StopRequest{ + PipelineId: "123", + }).Return(mockWait, nil) + + pipelineApi.EXPECT().GetByPipelineId(mock.Anything, "123").Return(&pipelines.GetPipelineResponse{}, nil) + + // Mock runner starting a new update + pipelineApi.EXPECT().StartUpdate(mock.Anything, pipelines.StartUpdate{ + PipelineId: "123", + }).Return(&pipelines.StartUpdateResponse{ + UpdateId: "456", + }, nil) + + // Mock runner polling for events + pipelineApi.EXPECT().ListPipelineEventsAll(mock.Anything, pipelines.ListPipelineEventsRequest{ + Filter: `update_id = '456'`, + MaxResults: 100, + PipelineId: "123", + }).Return([]pipelines.PipelineEvent{}, nil) + + // Mock runner polling for update status + pipelineApi.EXPECT().GetUpdateByPipelineIdAndUpdateId(mock.Anything, "123", "456"). + Return(&pipelines.GetUpdateResponse{ + Update: &pipelines.UpdateInfo{ + State: pipelines.UpdateInfoStateCompleted, + }, + }, nil) + + _, err := runner.Restart(ctx, &Options{}) + require.NoError(t, err) +} diff --git a/bundle/run/runner.go b/bundle/run/runner.go index 0f202ce7d..1cdcc9d8b 100644 --- a/bundle/run/runner.go +++ b/bundle/run/runner.go @@ -27,6 +27,10 @@ type Runner interface { // Run the underlying worklow. Run(ctx context.Context, opts *Options) (output.RunOutput, error) + // Restart the underlying workflow by cancelling any existing runs before + // starting a new one. + Restart(ctx context.Context, opts *Options) (output.RunOutput, error) + // Cancel the underlying workflow. Cancel(ctx context.Context) error diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index 9ef5eb8ff..ed5bd2ef1 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/bundle/run" + "github.com/databricks/cli/bundle/run/output" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" @@ -100,19 +101,16 @@ task or a Python wheel task, the second example applies. } runOptions.NoWait = noWait + var output output.RunOutput if restart { - s := cmdio.Spinner(ctx) - s <- "Cancelling all runs" - err := runner.Cancel(ctx) - close(s) - if err != nil { - return err - } + output, err = runner.Restart(ctx, &runOptions) + } else { + output, err = runner.Run(ctx, &runOptions) } - output, err := runner.Run(ctx, &runOptions) if err != nil { return err } + if output != nil { switch root.OutputType(cmd) { case flags.OutputText: From 60c153c0e765e6a7fd53e20f2ce431b7f3a70812 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Tue, 22 Oct 2024 17:52:46 +0200 Subject: [PATCH 07/14] Fix pipeline in default-python template not working for certain workspaces (#1854) Change the default-python template to not set the `catalog` field for the pipeline for workspaces that set `hive_metastore` as the default catalog. The Pipelines service currently returns an error when that value is used for the `catalog` field. This is the most simple fix for this issue, which was reported by a customer. As a followup, we should look at whether we want to prompt for a catalog instead, possibly just for this specific scenario. --- .../resources/{{.project_name}}.pipeline.yml.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index 50e5ad97c..1c6b8607e 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -3,7 +3,7 @@ resources: pipelines: {{.project_name}}_pipeline: name: {{.project_name}}_pipeline - {{- if eq default_catalog ""}} + {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: # catalog: catalog_name {{- else}} From 55a055d0f532c8ab60bdbccc51c106fb051520d8 Mon Sep 17 00:00:00 2001 From: Ilia Babanov Date: Wed, 23 Oct 2024 13:08:12 +0200 Subject: [PATCH 08/14] Add "output" flag to the bundle sync command (#1853) ## Changes We want to use 'bundle sync' in the vscode extension before running a file as an ad-hoc job (or through the context api). Right now we use bundle deploy in these cases, but deploying bundle resources is not always expected when you just want to quickly run a file. Sync makes more sense in these cases, but we still want to have verbose output to see what's happening. In the 'deploy' command we have hidden 'verbose' flag. For the sync I've just added 'output' flag, handling both json and text cases, similar to how it's done in the non-bundle `sync` command. The flag is not hidden (although we still don't show any output by default, if the flag is not set). VSCode Extension PR: https://github.com/databricks/databricks-vscode/pull/1401 ## Tests Manually --- cmd/bundle/sync.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/cmd/bundle/sync.go b/cmd/bundle/sync.go index df3e087c2..274bba0e0 100644 --- a/cmd/bundle/sync.go +++ b/cmd/bundle/sync.go @@ -1,7 +1,9 @@ package bundle import ( + "context" "fmt" + "io" "time" "github.com/databricks/cli/bundle" @@ -9,6 +11,7 @@ import ( "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/sync" "github.com/spf13/cobra" @@ -18,6 +21,7 @@ type syncFlags struct { interval time.Duration full bool watch bool + output flags.Output } func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, b *bundle.Bundle) (*sync.SyncOptions, error) { @@ -26,6 +30,21 @@ func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, b *bundle.Bundle) return nil, fmt.Errorf("cannot get sync options: %w", err) } + if f.output != "" { + var outputFunc func(context.Context, <-chan sync.Event, io.Writer) + switch f.output { + case flags.OutputText: + outputFunc = sync.TextOutput + case flags.OutputJSON: + outputFunc = sync.JsonOutput + } + if outputFunc != nil { + opts.OutputHandler = func(ctx context.Context, c <-chan sync.Event) { + outputFunc(ctx, c, cmd.OutOrStdout()) + } + } + } + opts.Full = f.full opts.PollInterval = f.interval return opts, nil @@ -42,6 +61,7 @@ func newSyncCommand() *cobra.Command { cmd.Flags().DurationVar(&f.interval, "interval", 1*time.Second, "file system polling interval (for --watch)") cmd.Flags().BoolVar(&f.full, "full", false, "perform full synchronization (default is incremental)") cmd.Flags().BoolVar(&f.watch, "watch", false, "watch local file system for changes") + cmd.Flags().Var(&f.output, "output", "type of the output format") cmd.RunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() @@ -65,6 +85,7 @@ func newSyncCommand() *cobra.Command { if err != nil { return err } + defer s.Close() log.Infof(ctx, "Remote file sync location: %v", opts.RemotePath) From ab622e65bbf8d90dcc8f363a978f871bb04554af Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 23 Oct 2024 16:08:27 +0200 Subject: [PATCH 09/14] [Release] Release v0.231.0 (#1856) CLI: * Added JSON input validation for CLI commands ([#1771](https://github.com/databricks/cli/pull/1771)). * Support Git worktrees for `sync` ([#1831](https://github.com/databricks/cli/pull/1831)). Bundles: * Add `bundle summary` to display URLs for deployed resources ([#1731](https://github.com/databricks/cli/pull/1731)). * Added a warning when incorrect permissions used for `/Workspace/Shared` bundle root ([#1821](https://github.com/databricks/cli/pull/1821)). * Show actionable errors for collaborative deployment scenarios ([#1386](https://github.com/databricks/cli/pull/1386)). * Fix path to repository-wide exclude file ([#1837](https://github.com/databricks/cli/pull/1837)). * Fixed typo in converting cluster permissions ([#1826](https://github.com/databricks/cli/pull/1826)). * Ignore metastore permission error during template generation ([#1819](https://github.com/databricks/cli/pull/1819)). * Handle normalization of `dyn.KindTime` into an any type ([#1836](https://github.com/databricks/cli/pull/1836)). * Added support for pip options in environment dependencies ([#1842](https://github.com/databricks/cli/pull/1842)). * Fix race condition when restarting continuous jobs ([#1849](https://github.com/databricks/cli/pull/1849)). * Fix pipeline in default-python template not working for certain workspaces ([#1854](https://github.com/databricks/cli/pull/1854)). * Add "output" flag to the bundle sync command ([#1853](https://github.com/databricks/cli/pull/1853)). Internal: * Move utility functions dealing with IAM to libs/iamutil ([#1820](https://github.com/databricks/cli/pull/1820)). * Remove unused `IS_OWNER` constant ([#1823](https://github.com/databricks/cli/pull/1823)). * Assert SDK version is consistent in the CLI generation process ([#1814](https://github.com/databricks/cli/pull/1814)). * Fixed unmarshalling json input into `interface{}` type ([#1832](https://github.com/databricks/cli/pull/1832)). * Fix `TestAccFsMkdirWhenFileExistsAtPath` in isolated Azure environments ([#1833](https://github.com/databricks/cli/pull/1833)). * Add behavioral tests for examples from the YAML spec ([#1835](https://github.com/databricks/cli/pull/1835)). * Remove Terraform conversion function that's no longer used ([#1840](https://github.com/databricks/cli/pull/1840)). * Encode assumptions about the dashboards API in a test ([#1839](https://github.com/databricks/cli/pull/1839)). * Add script to make testing of code on branches easier ([#1844](https://github.com/databricks/cli/pull/1844)). API Changes: * Added `databricks disable-legacy-dbfs` command group. OpenAPI commit cf9c61453990df0f9453670f2fe68e1b128647a2 (2024-10-14) Dependency updates: * Upgrade TF provider to 1.54.0 ([#1852](https://github.com/databricks/cli/pull/1852)). * Bump github.com/databricks/databricks-sdk-go from 0.48.0 to 0.49.0 ([#1843](https://github.com/databricks/cli/pull/1843)). --- CHANGELOG.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f31bb10b2..863474934 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Version changelog +## [Release] Release v0.231.0 + +CLI: + * Added JSON input validation for CLI commands ([#1771](https://github.com/databricks/cli/pull/1771)). + * Support Git worktrees for `sync` ([#1831](https://github.com/databricks/cli/pull/1831)). + +Bundles: + * Add `bundle summary` to display URLs for deployed resources ([#1731](https://github.com/databricks/cli/pull/1731)). + * Added a warning when incorrect permissions used for `/Workspace/Shared` bundle root ([#1821](https://github.com/databricks/cli/pull/1821)). + * Show actionable errors for collaborative deployment scenarios ([#1386](https://github.com/databricks/cli/pull/1386)). + * Fix path to repository-wide exclude file ([#1837](https://github.com/databricks/cli/pull/1837)). + * Fixed typo in converting cluster permissions ([#1826](https://github.com/databricks/cli/pull/1826)). + * Ignore metastore permission error during template generation ([#1819](https://github.com/databricks/cli/pull/1819)). + * Handle normalization of `dyn.KindTime` into an any type ([#1836](https://github.com/databricks/cli/pull/1836)). + * Added support for pip options in environment dependencies ([#1842](https://github.com/databricks/cli/pull/1842)). + * Fix race condition when restarting continuous jobs ([#1849](https://github.com/databricks/cli/pull/1849)). + * Fix pipeline in default-python template not working for certain workspaces ([#1854](https://github.com/databricks/cli/pull/1854)). + * Add "output" flag to the bundle sync command ([#1853](https://github.com/databricks/cli/pull/1853)). + +Internal: + * Move utility functions dealing with IAM to libs/iamutil ([#1820](https://github.com/databricks/cli/pull/1820)). + * Remove unused `IS_OWNER` constant ([#1823](https://github.com/databricks/cli/pull/1823)). + * Assert SDK version is consistent in the CLI generation process ([#1814](https://github.com/databricks/cli/pull/1814)). + * Fixed unmarshalling json input into `interface{}` type ([#1832](https://github.com/databricks/cli/pull/1832)). + * Fix `TestAccFsMkdirWhenFileExistsAtPath` in isolated Azure environments ([#1833](https://github.com/databricks/cli/pull/1833)). + * Add behavioral tests for examples from the YAML spec ([#1835](https://github.com/databricks/cli/pull/1835)). + * Remove Terraform conversion function that's no longer used ([#1840](https://github.com/databricks/cli/pull/1840)). + * Encode assumptions about the dashboards API in a test ([#1839](https://github.com/databricks/cli/pull/1839)). + * Add script to make testing of code on branches easier ([#1844](https://github.com/databricks/cli/pull/1844)). + +API Changes: + * Added `databricks disable-legacy-dbfs` command group. + +OpenAPI commit cf9c61453990df0f9453670f2fe68e1b128647a2 (2024-10-14) +Dependency updates: + * Upgrade TF provider to 1.54.0 ([#1852](https://github.com/databricks/cli/pull/1852)). + * Bump github.com/databricks/databricks-sdk-go from 0.48.0 to 0.49.0 ([#1843](https://github.com/databricks/cli/pull/1843)). + ## [Release] Release v0.230.0 Notable changes for Databricks Asset Bundles: From eddaddaf8b6773b6afcd2ab86ad483bdaca810fe Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 24 Oct 2024 14:03:12 +0200 Subject: [PATCH 10/14] Attempt to reduce test flakiness on Windows (#1845) ## Changes Test failures indicate that both stdout and stderr are consumed, yet the content of stdout doesn't end up in the intended output. This can happen if the goroutines responsible for writing to the combined output buffer attempt to write to the same underlying buffer concurrently. Example failure: ``` === RUN TestBackgroundCombinedOutput background_test.go:65: Error Trace: D:/a/cli/cli/libs/process/background_test.go:65 Error: elements differ extra elements in list A: ([]interface {}) (len=1) { (string) (len=1) "2" } listA: ([]string) (len=2) { (string) (len=1) "1", (string) (len=1) "2" } listB: ([]string) (len=1) { (string) (len=1) "1" } Test: TestBackgroundCombinedOutput ``` With the test body: https://github.com/databricks/cli/blob/ca45e53f42c5c4b26f2833554ab7118802c017cb/libs/process/background_test.go#L48-L66 With the implementation of `WithCombinedOutput`: https://github.com/databricks/cli/blob/ca45e53f42c5c4b26f2833554ab7118802c017cb/libs/process/opts.go#L72-L78 Notice that `c.Stdout` does get the "2", or the test failure would have included the relevant assertion error. This leads me to believe that there is a race on writing to `buf` from the two goroutines writing to `c.Stdout` and `c.Stderr`. ## Tests The test passes. If this PR has the intended effect remains to be seen... --- libs/process/opts.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/libs/process/opts.go b/libs/process/opts.go index 9516e49ba..dd0667516 100644 --- a/libs/process/opts.go +++ b/libs/process/opts.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "os/exec" + "sync" ) type execOption func(context.Context, *exec.Cmd) error @@ -69,10 +70,24 @@ func WithStdoutWriter(dst io.Writer) execOption { } } +// safeWriter is a writer that is safe to use concurrently. +// It serializes writes to the underlying writer. +type safeWriter struct { + w io.Writer + m sync.Mutex +} + +func (s *safeWriter) Write(p []byte) (n int, err error) { + s.m.Lock() + defer s.m.Unlock() + return s.w.Write(p) +} + func WithCombinedOutput(buf *bytes.Buffer) execOption { + sw := &safeWriter{w: buf} return func(_ context.Context, c *exec.Cmd) error { - c.Stdout = io.MultiWriter(buf, c.Stdout) - c.Stderr = io.MultiWriter(buf, c.Stderr) + c.Stdout = io.MultiWriter(sw, c.Stdout) + c.Stderr = io.MultiWriter(sw, c.Stderr) return nil } } From 89ee7d8a99e067220d48d458180806e3540ca884 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 24 Oct 2024 14:20:33 +0200 Subject: [PATCH 11/14] Add command to open a resource in the browser (#1846) ## Changes This builds on the functionality added in #1731 that produces a URL for every resource. Adds `bundle/resources` package to deal with resource lookups and command completion. The new functionality is similar to the lookup and command completion functionality located in `bundle/run`. It differs in that it doesn't gracefully deal with ambiguous references to resources, now that we explicitly validate this doesn't occur in the bundle configuration. It still allows resources to be looked up with their fully qualified key, `.`. ## Tests * Added unit tests for resource lookup and completion * Manually confirmed that `bundle open` prompts, accepts a key argument, and opens a browser --- bundle/resources/completion.go | 17 ++++ bundle/resources/completion_test.go | 32 +++++++ bundle/resources/lookup.go | 69 +++++++++++++ bundle/resources/lookup_test.go | 88 +++++++++++++++++ cmd/bundle/bundle.go | 1 + cmd/bundle/open.go | 144 ++++++++++++++++++++++++++++ 6 files changed, 351 insertions(+) create mode 100644 bundle/resources/completion.go create mode 100644 bundle/resources/completion_test.go create mode 100644 bundle/resources/lookup.go create mode 100644 bundle/resources/lookup_test.go create mode 100644 cmd/bundle/open.go diff --git a/bundle/resources/completion.go b/bundle/resources/completion.go new file mode 100644 index 000000000..3ce0510a9 --- /dev/null +++ b/bundle/resources/completion.go @@ -0,0 +1,17 @@ +package resources + +import "github.com/databricks/cli/bundle" + +// Completions returns the same as [References] except +// that every key maps directly to a single reference. +func Completions(b *bundle.Bundle) map[string]Reference { + out := make(map[string]Reference) + keyOnlyRefs, _ := References(b) + for k, refs := range keyOnlyRefs { + if len(refs) != 1 { + continue + } + out[k] = refs[0] + } + return out +} diff --git a/bundle/resources/completion_test.go b/bundle/resources/completion_test.go new file mode 100644 index 000000000..36ad1a06a --- /dev/null +++ b/bundle/resources/completion_test.go @@ -0,0 +1,32 @@ +package resources + +import ( + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/stretchr/testify/assert" +) + +func TestCompletions_SkipDuplicates(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": {}, + "bar": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "foo": {}, + }, + }, + }, + } + + // Test that this skips duplicates and only includes unambiguous completions. + out := Completions(b) + if assert.Len(t, out, 1) { + assert.Contains(t, out, "bar") + } +} diff --git a/bundle/resources/lookup.go b/bundle/resources/lookup.go new file mode 100644 index 000000000..74aec531a --- /dev/null +++ b/bundle/resources/lookup.go @@ -0,0 +1,69 @@ +package resources + +import ( + "fmt" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" +) + +// Reference is a reference to a resource. +// It includes the resource type description, and a reference to the resource itself. +type Reference struct { + Description config.ResourceDescription + Resource config.ConfigResource +} + +// Map is the core type for resource lookup and completion. +type Map map[string][]Reference + +// References returns maps of resource keys to a slice of [Reference]. +// +// The first map is indexed by the resource key only. +// The second map is indexed by the resource type name and its key. +// +// While the return types allows for multiple resources to share the same key, +// this is confirmed not to happen in the [validate.UniqueResourceKeys] mutator. +func References(b *bundle.Bundle) (Map, Map) { + keyOnly := make(Map) + keyWithType := make(Map) + + // Collect map of resource references indexed by their keys. + for _, group := range b.Config.Resources.AllResources() { + for k, v := range group.Resources { + ref := Reference{ + Description: group.Description, + Resource: v, + } + + kt := fmt.Sprintf("%s.%s", group.Description.PluralName, k) + keyOnly[k] = append(keyOnly[k], ref) + keyWithType[kt] = append(keyWithType[kt], ref) + } + } + + return keyOnly, keyWithType +} + +// Lookup returns the resource with the specified key. +// If the key maps to more than one resource, an error is returned. +// If the key does not map to any resource, an error is returned. +func Lookup(b *bundle.Bundle, key string) (Reference, error) { + keyOnlyRefs, keyWithTypeRefs := References(b) + refs, ok := keyOnlyRefs[key] + if !ok { + refs, ok = keyWithTypeRefs[key] + if !ok { + return Reference{}, fmt.Errorf("resource with key %q not found", key) + } + } + + switch { + case len(refs) == 1: + return refs[0], nil + case len(refs) > 1: + return Reference{}, fmt.Errorf("multiple resources with key %q found", key) + default: + panic("unreachable") + } +} diff --git a/bundle/resources/lookup_test.go b/bundle/resources/lookup_test.go new file mode 100644 index 000000000..d2092c23d --- /dev/null +++ b/bundle/resources/lookup_test.go @@ -0,0 +1,88 @@ +package resources + +import ( + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLookup_EmptyBundle(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{}, + }, + } + + _, err := Lookup(b, "foo") + require.Error(t, err) + assert.ErrorContains(t, err, "resource with key \"foo\" not found") +} + +func TestLookup_NotFound(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": {}, + "bar": {}, + }, + }, + }, + } + + _, err := Lookup(b, "qux") + require.Error(t, err) + assert.ErrorContains(t, err, `resource with key "qux" not found`) +} + +func TestLookup_MultipleFound(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "foo": {}, + }, + }, + }, + } + + _, err := Lookup(b, "foo") + require.Error(t, err) + assert.ErrorContains(t, err, `multiple resources with key "foo" found`) +} + +func TestLookup_Nominal(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": { + JobSettings: &jobs.JobSettings{ + Name: "Foo job", + }, + }, + }, + }, + }, + } + + // Lookup by key only. + out, err := Lookup(b, "foo") + if assert.NoError(t, err) { + assert.Equal(t, "Foo job", out.Resource.GetName()) + } + + // Lookup by type and key. + out, err = Lookup(b, "jobs.foo") + if assert.NoError(t, err) { + assert.Equal(t, "Foo job", out.Resource.GetName()) + } +} diff --git a/cmd/bundle/bundle.go b/cmd/bundle/bundle.go index 0880c9c44..fb88cd7d0 100644 --- a/cmd/bundle/bundle.go +++ b/cmd/bundle/bundle.go @@ -27,5 +27,6 @@ func New() *cobra.Command { cmd.AddCommand(newGenerateCommand()) cmd.AddCommand(newDebugCommand()) cmd.AddCommand(deployment.NewDeploymentCommand()) + cmd.AddCommand(newOpenCommand()) return cmd } diff --git a/cmd/bundle/open.go b/cmd/bundle/open.go new file mode 100644 index 000000000..a2ad32fd8 --- /dev/null +++ b/cmd/bundle/open.go @@ -0,0 +1,144 @@ +package bundle + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/deploy/terraform" + "github.com/databricks/cli/bundle/phases" + "github.com/databricks/cli/bundle/resources" + "github.com/databricks/cli/cmd/bundle/utils" + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" + "golang.org/x/exp/maps" + + "github.com/pkg/browser" +) + +func promptOpenArgument(ctx context.Context, b *bundle.Bundle) (string, error) { + // Compute map of "Human readable name of resource" -> "resource key". + inv := make(map[string]string) + for k, ref := range resources.Completions(b) { + title := fmt.Sprintf("%s: %s", ref.Description.SingularTitle, ref.Resource.GetName()) + inv[title] = k + } + + key, err := cmdio.Select(ctx, inv, "Resource to open") + if err != nil { + return "", err + } + + return key, nil +} + +func resolveOpenArgument(ctx context.Context, b *bundle.Bundle, args []string) (string, error) { + // If no arguments are specified, prompt the user to select the resource to open. + if len(args) == 0 && cmdio.IsPromptSupported(ctx) { + return promptOpenArgument(ctx, b) + } + + if len(args) < 1 { + return "", fmt.Errorf("expected a KEY of the resource to open") + } + + return args[0], nil +} + +func newOpenCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "open", + Short: "Open a resource in the browser", + Args: root.MaximumNArgs(1), + } + + var forcePull bool + cmd.Flags().BoolVar(&forcePull, "force-pull", false, "Skip local cache and load the state from the remote workspace") + + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + b, diags := utils.ConfigureBundleWithVariables(cmd) + if err := diags.Error(); err != nil { + return diags.Error() + } + + diags = bundle.Apply(ctx, b, phases.Initialize()) + if err := diags.Error(); err != nil { + return err + } + + arg, err := resolveOpenArgument(ctx, b, args) + if err != nil { + return err + } + + cacheDir, err := terraform.Dir(ctx, b) + if err != nil { + return err + } + _, stateFileErr := os.Stat(filepath.Join(cacheDir, terraform.TerraformStateFileName)) + _, configFileErr := os.Stat(filepath.Join(cacheDir, terraform.TerraformConfigFileName)) + noCache := errors.Is(stateFileErr, os.ErrNotExist) || errors.Is(configFileErr, os.ErrNotExist) + + if forcePull || noCache { + diags = bundle.Apply(ctx, b, bundle.Seq( + terraform.StatePull(), + terraform.Interpolate(), + terraform.Write(), + )) + if err := diags.Error(); err != nil { + return err + } + } + + diags = bundle.Apply(ctx, b, bundle.Seq( + terraform.Load(), + mutator.InitializeURLs(), + )) + if err := diags.Error(); err != nil { + return err + } + + // Locate resource to open. + ref, err := resources.Lookup(b, arg) + if err != nil { + return err + } + + // Confirm that the resource has a URL. + url := ref.Resource.GetURL() + if url == "" { + return fmt.Errorf("resource does not have a URL associated with it (has it been deployed?)") + } + + return browser.OpenURL(url) + } + + cmd.ValidArgsFunction = func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + b, diags := root.MustConfigureBundle(cmd) + if err := diags.Error(); err != nil { + cobra.CompErrorln(err.Error()) + return nil, cobra.ShellCompDirectiveError + } + + // No completion in the context of a bundle. + // Source and destination paths are taken from bundle configuration. + if b == nil { + return nil, cobra.ShellCompDirectiveNoFileComp + } + + if len(args) == 0 { + completions := resources.Completions(b) + return maps.Keys(completions), cobra.ShellCompDirectiveNoFileComp + } else { + return nil, cobra.ShellCompDirectiveNoFileComp + } + } + + return cmd +} From eaea308254556ff2ce37d06a98b8af6b93af482c Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 24 Oct 2024 14:36:17 +0200 Subject: [PATCH 12/14] Added validator for folder permissions (#1824) ## Changes This validator checks permissions defined in top-level bundle config and permissions set in workspace for the folders bundle is deployed to. It raises the warning if the permissions defined in the workspace are not defined in bundle. This validator is executed only during `bundle validate` command. ## Tests ``` Warning: untracked permissions apply to target workspace path The following permissions apply to the workspace folder at "/Workspace/Users/andrew.nester@databricks.com/.bundle/clusters/default" but are not configured in the bundle: - level: CAN_MANAGE, user_name: andrew.nester@databricks.com ``` --------- Co-authored-by: Pieter Noordhuis --- bundle/config/resources/permission.go | 18 ++ bundle/config/validate/folder_permissions.go | 126 +++++++++++ .../validate/folder_permissions_test.go | 208 ++++++++++++++++++ bundle/config/validate/validate.go | 1 + .../libraries/{workspace_path.go => path.go} | 9 + .../{workspace_path_test.go => path_test.go} | 10 + bundle/permissions/validate.go | 8 +- .../permissions/workspace_path_permissions.go | 89 ++++++++ .../workspace_path_permissions_test.go | 121 ++++++++++ bundle/permissions/workspace_root.go | 4 +- 10 files changed, 586 insertions(+), 8 deletions(-) create mode 100644 bundle/config/validate/folder_permissions.go create mode 100644 bundle/config/validate/folder_permissions_test.go rename bundle/libraries/{workspace_path.go => path.go} (78%) rename bundle/libraries/{workspace_path_test.go => path_test.go} (77%) create mode 100644 bundle/permissions/workspace_path_permissions.go create mode 100644 bundle/permissions/workspace_path_permissions_test.go diff --git a/bundle/config/resources/permission.go b/bundle/config/resources/permission.go index fa2d8796c..62e18a09e 100644 --- a/bundle/config/resources/permission.go +++ b/bundle/config/resources/permission.go @@ -1,5 +1,7 @@ package resources +import "fmt" + // Permission holds the permission level setting for a single principal. // Multiple of these can be defined on any resource. type Permission struct { @@ -9,3 +11,19 @@ type Permission struct { ServicePrincipalName string `json:"service_principal_name,omitempty"` GroupName string `json:"group_name,omitempty"` } + +func (p Permission) String() string { + if p.UserName != "" { + return fmt.Sprintf("level: %s, user_name: %s", p.Level, p.UserName) + } + + if p.ServicePrincipalName != "" { + return fmt.Sprintf("level: %s, service_principal_name: %s", p.Level, p.ServicePrincipalName) + } + + if p.GroupName != "" { + return fmt.Sprintf("level: %s, group_name: %s", p.Level, p.GroupName) + } + + return fmt.Sprintf("level: %s", p.Level) +} diff --git a/bundle/config/validate/folder_permissions.go b/bundle/config/validate/folder_permissions.go new file mode 100644 index 000000000..a376bd776 --- /dev/null +++ b/bundle/config/validate/folder_permissions.go @@ -0,0 +1,126 @@ +package validate + +import ( + "context" + "fmt" + "path" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/bundle/permissions" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/service/workspace" + "golang.org/x/sync/errgroup" +) + +type folderPermissions struct { +} + +// Apply implements bundle.ReadOnlyMutator. +func (f *folderPermissions) Apply(ctx context.Context, b bundle.ReadOnlyBundle) diag.Diagnostics { + if len(b.Config().Permissions) == 0 { + return nil + } + + rootPath := b.Config().Workspace.RootPath + paths := []string{} + if !libraries.IsVolumesPath(rootPath) && !libraries.IsWorkspaceSharedPath(rootPath) { + paths = append(paths, rootPath) + } + + if !strings.HasSuffix(rootPath, "/") { + rootPath += "/" + } + + for _, p := range []string{ + b.Config().Workspace.ArtifactPath, + b.Config().Workspace.FilePath, + b.Config().Workspace.StatePath, + b.Config().Workspace.ResourcePath, + } { + if libraries.IsWorkspaceSharedPath(p) || libraries.IsVolumesPath(p) { + continue + } + + if strings.HasPrefix(p, rootPath) { + continue + } + + paths = append(paths, p) + } + + var diags diag.Diagnostics + g, ctx := errgroup.WithContext(ctx) + results := make([]diag.Diagnostics, len(paths)) + for i, p := range paths { + g.Go(func() error { + results[i] = checkFolderPermission(ctx, b, p) + return nil + }) + } + + if err := g.Wait(); err != nil { + return diag.FromErr(err) + } + + for _, r := range results { + diags = diags.Extend(r) + } + + return diags +} + +func checkFolderPermission(ctx context.Context, b bundle.ReadOnlyBundle, folderPath string) diag.Diagnostics { + w := b.WorkspaceClient().Workspace + obj, err := getClosestExistingObject(ctx, w, folderPath) + if err != nil { + return diag.FromErr(err) + } + + objPermissions, err := w.GetPermissions(ctx, workspace.GetWorkspaceObjectPermissionsRequest{ + WorkspaceObjectId: fmt.Sprint(obj.ObjectId), + WorkspaceObjectType: "directories", + }) + if err != nil { + return diag.FromErr(err) + } + + p := permissions.ObjectAclToResourcePermissions(folderPath, objPermissions.AccessControlList) + return p.Compare(b.Config().Permissions) +} + +func getClosestExistingObject(ctx context.Context, w workspace.WorkspaceInterface, folderPath string) (*workspace.ObjectInfo, error) { + for { + obj, err := w.GetStatusByPath(ctx, folderPath) + if err == nil { + return obj, nil + } + + if !apierr.IsMissing(err) { + return nil, err + } + + parent := path.Dir(folderPath) + // If the parent is the same as the current folder, then we have reached the root + if folderPath == parent { + break + } + + folderPath = parent + } + + return nil, fmt.Errorf("folder %s and its parent folders do not exist", folderPath) +} + +// Name implements bundle.ReadOnlyMutator. +func (f *folderPermissions) Name() string { + return "validate:folder_permissions" +} + +// ValidateFolderPermissions validates that permissions for the folders in Workspace file system matches +// the permissions in the top-level permissions section of the bundle. +func ValidateFolderPermissions() bundle.ReadOnlyMutator { + return &folderPermissions{} +} diff --git a/bundle/config/validate/folder_permissions_test.go b/bundle/config/validate/folder_permissions_test.go new file mode 100644 index 000000000..8e68c9fbf --- /dev/null +++ b/bundle/config/validate/folder_permissions_test.go @@ -0,0 +1,208 @@ +package validate + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/permissions" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestFolderPermissionsInheritedWhenRootPathDoesNotExist(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Users/foo@bar.com", + ArtifactPath: "/Workspace/Users/otherfoo@bar.com/artifacts", + FilePath: "/Workspace/Users/foo@bar.com/files", + StatePath: "/Workspace/Users/foo@bar.com/state", + ResourcePath: "/Workspace/Users/foo@bar.com/resources", + }, + Permissions: []resources.Permission{ + {Level: permissions.CAN_MANAGE, UserName: "foo@bar.com"}, + }, + }, + } + m := mocks.NewMockWorkspaceClient(t) + api := m.GetMockWorkspaceAPI() + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users/otherfoo@bar.com/artifacts").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users/otherfoo@bar.com").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users/foo@bar.com").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace").Return(&workspace.ObjectInfo{ + ObjectId: 1234, + }, nil) + + api.EXPECT().GetPermissions(mock.Anything, workspace.GetWorkspaceObjectPermissionsRequest{ + WorkspaceObjectId: "1234", + WorkspaceObjectType: "directories", + }).Return(&workspace.WorkspaceObjectPermissions{ + ObjectId: "1234", + AccessControlList: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + }, nil) + + b.SetWorkpaceClient(m.WorkspaceClient) + rb := bundle.ReadOnly(b) + + diags := bundle.ApplyReadOnly(context.Background(), rb, ValidateFolderPermissions()) + require.Empty(t, diags) +} + +func TestValidateFolderPermissionsFailsOnMissingBundlePermission(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Users/foo@bar.com", + ArtifactPath: "/Workspace/Users/foo@bar.com/artifacts", + FilePath: "/Workspace/Users/foo@bar.com/files", + StatePath: "/Workspace/Users/foo@bar.com/state", + ResourcePath: "/Workspace/Users/foo@bar.com/resources", + }, + Permissions: []resources.Permission{ + {Level: permissions.CAN_MANAGE, UserName: "foo@bar.com"}, + }, + }, + } + m := mocks.NewMockWorkspaceClient(t) + api := m.GetMockWorkspaceAPI() + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users/foo@bar.com").Return(&workspace.ObjectInfo{ + ObjectId: 1234, + }, nil) + + api.EXPECT().GetPermissions(mock.Anything, workspace.GetWorkspaceObjectPermissionsRequest{ + WorkspaceObjectId: "1234", + WorkspaceObjectType: "directories", + }).Return(&workspace.WorkspaceObjectPermissions{ + ObjectId: "1234", + AccessControlList: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + { + UserName: "foo2@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + }, nil) + + b.SetWorkpaceClient(m.WorkspaceClient) + rb := bundle.ReadOnly(b) + + diags := bundle.ApplyReadOnly(context.Background(), rb, ValidateFolderPermissions()) + require.Len(t, diags, 1) + require.Equal(t, "untracked permissions apply to target workspace path", diags[0].Summary) + require.Equal(t, diag.Warning, diags[0].Severity) + require.Equal(t, "The following permissions apply to the workspace folder at \"/Workspace/Users/foo@bar.com\" but are not configured in the bundle:\n- level: CAN_MANAGE, user_name: foo2@bar.com\n", diags[0].Detail) +} + +func TestValidateFolderPermissionsFailsOnPermissionMismatch(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Users/foo@bar.com", + ArtifactPath: "/Workspace/Users/foo@bar.com/artifacts", + FilePath: "/Workspace/Users/foo@bar.com/files", + StatePath: "/Workspace/Users/foo@bar.com/state", + ResourcePath: "/Workspace/Users/foo@bar.com/resources", + }, + Permissions: []resources.Permission{ + {Level: permissions.CAN_MANAGE, UserName: "foo@bar.com"}, + }, + }, + } + m := mocks.NewMockWorkspaceClient(t) + api := m.GetMockWorkspaceAPI() + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users/foo@bar.com").Return(&workspace.ObjectInfo{ + ObjectId: 1234, + }, nil) + + api.EXPECT().GetPermissions(mock.Anything, workspace.GetWorkspaceObjectPermissionsRequest{ + WorkspaceObjectId: "1234", + WorkspaceObjectType: "directories", + }).Return(&workspace.WorkspaceObjectPermissions{ + ObjectId: "1234", + AccessControlList: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo2@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + }, nil) + + b.SetWorkpaceClient(m.WorkspaceClient) + rb := bundle.ReadOnly(b) + + diags := bundle.ApplyReadOnly(context.Background(), rb, ValidateFolderPermissions()) + require.Len(t, diags, 1) + require.Equal(t, "untracked permissions apply to target workspace path", diags[0].Summary) + require.Equal(t, diag.Warning, diags[0].Severity) +} + +func TestValidateFolderPermissionsFailsOnNoRootFolder(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/NotExisting", + ArtifactPath: "/NotExisting/artifacts", + FilePath: "/NotExisting/files", + StatePath: "/NotExisting/state", + ResourcePath: "/NotExisting/resources", + }, + Permissions: []resources.Permission{ + {Level: permissions.CAN_MANAGE, UserName: "foo@bar.com"}, + }, + }, + } + m := mocks.NewMockWorkspaceClient(t) + api := m.GetMockWorkspaceAPI() + api.EXPECT().GetStatusByPath(mock.Anything, "/NotExisting").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + api.EXPECT().GetStatusByPath(mock.Anything, "/").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + + b.SetWorkpaceClient(m.WorkspaceClient) + rb := bundle.ReadOnly(b) + + diags := bundle.ApplyReadOnly(context.Background(), rb, ValidateFolderPermissions()) + require.Len(t, diags, 1) + require.Equal(t, "folder / and its parent folders do not exist", diags[0].Summary) + require.Equal(t, diag.Error, diags[0].Severity) +} diff --git a/bundle/config/validate/validate.go b/bundle/config/validate/validate.go index 79f42bd23..440477e65 100644 --- a/bundle/config/validate/validate.go +++ b/bundle/config/validate/validate.go @@ -35,6 +35,7 @@ func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics FilesToSync(), ValidateSyncPatterns(), JobTaskClusterSpec(), + ValidateFolderPermissions(), )) } diff --git a/bundle/libraries/workspace_path.go b/bundle/libraries/path.go similarity index 78% rename from bundle/libraries/workspace_path.go rename to bundle/libraries/path.go index 126ad3f13..3bad40fac 100644 --- a/bundle/libraries/workspace_path.go +++ b/bundle/libraries/path.go @@ -36,3 +36,12 @@ func IsWorkspaceLibrary(library *compute.Library) bool { return IsWorkspacePath(path) } + +// IsVolumesPath returns true if the specified path indicates that +func IsVolumesPath(path string) bool { + return strings.HasPrefix(path, "/Volumes/") +} + +func IsWorkspaceSharedPath(path string) bool { + return strings.HasPrefix(path, "/Workspace/Shared/") +} diff --git a/bundle/libraries/workspace_path_test.go b/bundle/libraries/path_test.go similarity index 77% rename from bundle/libraries/workspace_path_test.go rename to bundle/libraries/path_test.go index feaaab7f7..90fe187a2 100644 --- a/bundle/libraries/workspace_path_test.go +++ b/bundle/libraries/path_test.go @@ -31,3 +31,13 @@ func TestIsWorkspaceLibrary(t *testing.T) { // Empty. assert.False(t, IsWorkspaceLibrary(&compute.Library{})) } + +func TestIsVolumesPath(t *testing.T) { + // Absolute paths with particular prefixes. + assert.True(t, IsVolumesPath("/Volumes/path/to/package")) + + // Relative paths. + assert.False(t, IsVolumesPath("myfile.txt")) + assert.False(t, IsVolumesPath("./myfile.txt")) + assert.False(t, IsVolumesPath("../myfile.txt")) +} diff --git a/bundle/permissions/validate.go b/bundle/permissions/validate.go index acd2e6062..f1a18f430 100644 --- a/bundle/permissions/validate.go +++ b/bundle/permissions/validate.go @@ -3,9 +3,9 @@ package permissions import ( "context" "fmt" - "strings" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/libs/diag" ) @@ -21,17 +21,13 @@ func (*validateSharedRootPermissions) Name() string { } func (*validateSharedRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - if isWorkspaceSharedRoot(b.Config.Workspace.RootPath) { + if libraries.IsWorkspaceSharedPath(b.Config.Workspace.RootPath) { return isUsersGroupPermissionSet(b) } return nil } -func isWorkspaceSharedRoot(path string) bool { - return strings.HasPrefix(path, "/Workspace/Shared/") -} - // isUsersGroupPermissionSet checks that top-level permissions set for bundle contain group_name: users with CAN_MANAGE permission. func isUsersGroupPermissionSet(b *bundle.Bundle) diag.Diagnostics { var diags diag.Diagnostics diff --git a/bundle/permissions/workspace_path_permissions.go b/bundle/permissions/workspace_path_permissions.go new file mode 100644 index 000000000..a3b4424c1 --- /dev/null +++ b/bundle/permissions/workspace_path_permissions.go @@ -0,0 +1,89 @@ +package permissions + +import ( + "fmt" + "strings" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/service/workspace" +) + +type WorkspacePathPermissions struct { + Path string + Permissions []resources.Permission +} + +func ObjectAclToResourcePermissions(path string, acl []workspace.WorkspaceObjectAccessControlResponse) *WorkspacePathPermissions { + permissions := make([]resources.Permission, 0) + for _, a := range acl { + // Skip the admin group because it's added to all resources by default. + if a.GroupName == "admins" { + continue + } + + for _, pl := range a.AllPermissions { + permissions = append(permissions, resources.Permission{ + Level: convertWorkspaceObjectPermissionLevel(pl.PermissionLevel), + GroupName: a.GroupName, + UserName: a.UserName, + ServicePrincipalName: a.ServicePrincipalName, + }) + } + } + + return &WorkspacePathPermissions{Permissions: permissions, Path: path} +} + +func (p WorkspacePathPermissions) Compare(perms []resources.Permission) diag.Diagnostics { + var diags diag.Diagnostics + + // Check the permissions in the workspace and see if they are all set in the bundle. + ok, missing := containsAll(p.Permissions, perms) + if !ok { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: "untracked permissions apply to target workspace path", + Detail: fmt.Sprintf("The following permissions apply to the workspace folder at %q but are not configured in the bundle:\n%s", p.Path, toString(missing)), + }) + } + + return diags +} + +// containsAll checks if permA contains all permissions in permB. +func containsAll(permA []resources.Permission, permB []resources.Permission) (bool, []resources.Permission) { + missing := make([]resources.Permission, 0) + for _, a := range permA { + found := false + for _, b := range permB { + if a == b { + found = true + break + } + } + if !found { + missing = append(missing, a) + } + } + return len(missing) == 0, missing +} + +// convertWorkspaceObjectPermissionLevel converts matching object permission levels to bundle ones. +// If there is no matching permission level, it returns permission level as is, for example, CAN_EDIT. +func convertWorkspaceObjectPermissionLevel(level workspace.WorkspaceObjectPermissionLevel) string { + switch level { + case workspace.WorkspaceObjectPermissionLevelCanRead: + return CAN_VIEW + default: + return string(level) + } +} + +func toString(p []resources.Permission) string { + var sb strings.Builder + for _, perm := range p { + sb.WriteString(fmt.Sprintf("- %s\n", perm.String())) + } + return sb.String() +} diff --git a/bundle/permissions/workspace_path_permissions_test.go b/bundle/permissions/workspace_path_permissions_test.go new file mode 100644 index 000000000..0bb00474c --- /dev/null +++ b/bundle/permissions/workspace_path_permissions_test.go @@ -0,0 +1,121 @@ +package permissions + +import ( + "testing" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/require" +) + +func TestWorkspacePathPermissionsCompare(t *testing.T) { + testCases := []struct { + perms []resources.Permission + acl []workspace.WorkspaceObjectAccessControlResponse + expected diag.Diagnostics + }{ + { + perms: []resources.Permission{ + {Level: CAN_MANAGE, UserName: "foo@bar.com"}, + }, + acl: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + expected: nil, + }, + { + perms: []resources.Permission{ + {Level: CAN_MANAGE, UserName: "foo@bar.com"}, + }, + acl: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + { + GroupName: "admins", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + expected: nil, + }, + { + perms: []resources.Permission{ + {Level: CAN_VIEW, UserName: "foo@bar.com"}, + {Level: CAN_MANAGE, ServicePrincipalName: "sp.com"}, + }, + acl: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_READ"}, + }, + }, + }, + expected: nil, + }, + { + perms: []resources.Permission{ + {Level: CAN_MANAGE, UserName: "foo@bar.com"}, + }, + acl: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + { + GroupName: "foo", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + expected: diag.Diagnostics{ + { + Severity: diag.Warning, + Summary: "untracked permissions apply to target workspace path", + Detail: "The following permissions apply to the workspace folder at \"path\" but are not configured in the bundle:\n- level: CAN_MANAGE, group_name: foo\n", + }, + }, + }, + { + perms: []resources.Permission{ + {Level: CAN_MANAGE, UserName: "foo@bar.com"}, + }, + acl: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo2@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + expected: diag.Diagnostics{ + { + Severity: diag.Warning, + Summary: "untracked permissions apply to target workspace path", + Detail: "The following permissions apply to the workspace folder at \"path\" but are not configured in the bundle:\n- level: CAN_MANAGE, user_name: foo2@bar.com\n", + }, + }, + }, + } + + for _, tc := range testCases { + wp := ObjectAclToResourcePermissions("path", tc.acl) + diags := wp.Compare(tc.perms) + require.Equal(t, tc.expected, diags) + } + +} diff --git a/bundle/permissions/workspace_root.go b/bundle/permissions/workspace_root.go index e7867521e..93a90ed9c 100644 --- a/bundle/permissions/workspace_root.go +++ b/bundle/permissions/workspace_root.go @@ -34,7 +34,7 @@ func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle) error { permissions := make([]workspace.WorkspaceObjectAccessControlRequest, 0) for _, p := range b.Config.Permissions { - level, err := getWorkspaceObjectPermissionLevel(p.Level) + level, err := GetWorkspaceObjectPermissionLevel(p.Level) if err != nil { return err } @@ -65,7 +65,7 @@ func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle) error { return err } -func getWorkspaceObjectPermissionLevel(bundlePermission string) (workspace.WorkspaceObjectPermissionLevel, error) { +func GetWorkspaceObjectPermissionLevel(bundlePermission string) (workspace.WorkspaceObjectPermissionLevel, error) { switch bundlePermission { case CAN_MANAGE: return workspace.WorkspaceObjectPermissionLevelCanManage, nil From ed84a33b0a70312c04f14908225bb9c119bb84f5 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 24 Oct 2024 15:24:30 +0200 Subject: [PATCH 13/14] Reuse resource resolution code for the run command (#1858) ## Changes As of #1846 we have a generalized package for doing resource lookups and completion. This change updates the run command to use this instead of more specific code under `bundle/run`. ## Tests * Unit tests pass * Manually confirmed that completion and prompting works --- bundle/resources/completion.go | 4 +- bundle/resources/completion_test.go | 26 ++++++++++ bundle/resources/lookup.go | 43 +++++++++++++--- bundle/resources/lookup_test.go | 29 +++++++++++ bundle/run/keys.go | 69 ------------------------- bundle/run/keys_test.go | 25 --------- bundle/run/runner.go | 51 ++++++++---------- bundle/run/runner_test.go | 80 +++-------------------------- cmd/bundle/run.go | 74 +++++++++++++++++++------- 9 files changed, 175 insertions(+), 226 deletions(-) delete mode 100644 bundle/run/keys.go delete mode 100644 bundle/run/keys_test.go diff --git a/bundle/resources/completion.go b/bundle/resources/completion.go index 3ce0510a9..c1bcd022f 100644 --- a/bundle/resources/completion.go +++ b/bundle/resources/completion.go @@ -4,9 +4,9 @@ import "github.com/databricks/cli/bundle" // Completions returns the same as [References] except // that every key maps directly to a single reference. -func Completions(b *bundle.Bundle) map[string]Reference { +func Completions(b *bundle.Bundle, filters ...Filter) map[string]Reference { out := make(map[string]Reference) - keyOnlyRefs, _ := References(b) + keyOnlyRefs, _ := References(b, filters...) for k, refs := range keyOnlyRefs { if len(refs) != 1 { continue diff --git a/bundle/resources/completion_test.go b/bundle/resources/completion_test.go index 36ad1a06a..2f7942aae 100644 --- a/bundle/resources/completion_test.go +++ b/bundle/resources/completion_test.go @@ -30,3 +30,29 @@ func TestCompletions_SkipDuplicates(t *testing.T) { assert.Contains(t, out, "bar") } } + +func TestCompletions_Filter(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "bar": {}, + }, + }, + }, + } + + includeJobs := func(ref Reference) bool { + _, ok := ref.Resource.(*resources.Job) + return ok + } + + // Test that this does not include the pipeline. + out := Completions(b, includeJobs) + if assert.Len(t, out, 1) { + assert.Contains(t, out, "foo") + } +} diff --git a/bundle/resources/lookup.go b/bundle/resources/lookup.go index 74aec531a..48d862230 100644 --- a/bundle/resources/lookup.go +++ b/bundle/resources/lookup.go @@ -10,13 +10,36 @@ import ( // Reference is a reference to a resource. // It includes the resource type description, and a reference to the resource itself. type Reference struct { + // Key is the unique key of the resource, e.g. "my_job". + Key string + + // KeyWithType is the unique key of the resource, including the resource type, e.g. "jobs.my_job". + KeyWithType string + + // Description is the resource type description. Description config.ResourceDescription - Resource config.ConfigResource + + // Resource is the resource itself. + Resource config.ConfigResource } // Map is the core type for resource lookup and completion. type Map map[string][]Reference +// Filter defines the function signature for filtering resources. +type Filter func(Reference) bool + +// includeReference checks if the specified reference passes all filters. +// If the list of filters is empty, the reference is always included. +func includeReference(filters []Filter, ref Reference) bool { + for _, filter := range filters { + if !filter(ref) { + return false + } + } + return true +} + // References returns maps of resource keys to a slice of [Reference]. // // The first map is indexed by the resource key only. @@ -24,7 +47,7 @@ type Map map[string][]Reference // // While the return types allows for multiple resources to share the same key, // this is confirmed not to happen in the [validate.UniqueResourceKeys] mutator. -func References(b *bundle.Bundle) (Map, Map) { +func References(b *bundle.Bundle, filters ...Filter) (Map, Map) { keyOnly := make(Map) keyWithType := make(Map) @@ -32,13 +55,19 @@ func References(b *bundle.Bundle) (Map, Map) { for _, group := range b.Config.Resources.AllResources() { for k, v := range group.Resources { ref := Reference{ + Key: k, + KeyWithType: fmt.Sprintf("%s.%s", group.Description.PluralName, k), Description: group.Description, Resource: v, } - kt := fmt.Sprintf("%s.%s", group.Description.PluralName, k) - keyOnly[k] = append(keyOnly[k], ref) - keyWithType[kt] = append(keyWithType[kt], ref) + // Skip resources that do not pass all filters. + if !includeReference(filters, ref) { + continue + } + + keyOnly[ref.Key] = append(keyOnly[ref.Key], ref) + keyWithType[ref.KeyWithType] = append(keyWithType[ref.KeyWithType], ref) } } @@ -48,8 +77,8 @@ func References(b *bundle.Bundle) (Map, Map) { // Lookup returns the resource with the specified key. // If the key maps to more than one resource, an error is returned. // If the key does not map to any resource, an error is returned. -func Lookup(b *bundle.Bundle, key string) (Reference, error) { - keyOnlyRefs, keyWithTypeRefs := References(b) +func Lookup(b *bundle.Bundle, key string, filters ...Filter) (Reference, error) { + keyOnlyRefs, keyWithTypeRefs := References(b, filters...) refs, ok := keyOnlyRefs[key] if !ok { refs, ok = keyWithTypeRefs[key] diff --git a/bundle/resources/lookup_test.go b/bundle/resources/lookup_test.go index d2092c23d..b2eaafd1a 100644 --- a/bundle/resources/lookup_test.go +++ b/bundle/resources/lookup_test.go @@ -86,3 +86,32 @@ func TestLookup_Nominal(t *testing.T) { assert.Equal(t, "Foo job", out.Resource.GetName()) } } + +func TestLookup_NominalWithFilters(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "bar": {}, + }, + }, + }, + } + + includeJobs := func(ref Reference) bool { + _, ok := ref.Resource.(*resources.Job) + return ok + } + + // This should succeed because the filter includes jobs. + _, err := Lookup(b, "foo", includeJobs) + require.NoError(t, err) + + // This should fail because the filter excludes pipelines. + _, err = Lookup(b, "bar", includeJobs) + require.Error(t, err) + assert.ErrorContains(t, err, `resource with key "bar" not found`) +} diff --git a/bundle/run/keys.go b/bundle/run/keys.go deleted file mode 100644 index 76ec50ac8..000000000 --- a/bundle/run/keys.go +++ /dev/null @@ -1,69 +0,0 @@ -package run - -import ( - "fmt" - - "github.com/databricks/cli/bundle" - "golang.org/x/exp/maps" -) - -// RunnerLookup maps identifiers to a list of workloads that match that identifier. -// The list can have more than 1 entry if resources of different types use the -// same key. When this happens, the user should disambiguate between them. -type RunnerLookup map[string][]Runner - -// ResourceKeys computes a map with -func ResourceKeys(b *bundle.Bundle) (keyOnly RunnerLookup, keyWithType RunnerLookup) { - keyOnly = make(RunnerLookup) - keyWithType = make(RunnerLookup) - - r := b.Config.Resources - for k, v := range r.Jobs { - kt := fmt.Sprintf("jobs.%s", k) - w := jobRunner{key: key(kt), bundle: b, job: v} - keyOnly[k] = append(keyOnly[k], &w) - keyWithType[kt] = append(keyWithType[kt], &w) - } - for k, v := range r.Pipelines { - kt := fmt.Sprintf("pipelines.%s", k) - w := pipelineRunner{key: key(kt), bundle: b, pipeline: v} - keyOnly[k] = append(keyOnly[k], &w) - keyWithType[kt] = append(keyWithType[kt], &w) - } - return -} - -// ResourceCompletionMap returns a map of resource keys to their respective names. -func ResourceCompletionMap(b *bundle.Bundle) map[string]string { - out := make(map[string]string) - keyOnly, keyWithType := ResourceKeys(b) - - // Keep track of resources we have seen by their fully qualified key. - seen := make(map[string]bool) - - // First add resources that can be identified by key alone. - for k, v := range keyOnly { - // Invariant: len(v) >= 1. See [ResourceKeys]. - if len(v) == 1 { - seen[v[0].Key()] = true - out[k] = v[0].Name() - } - } - - // Then add resources that can only be identified by their type and key. - for k, v := range keyWithType { - // Invariant: len(v) == 1. See [ResourceKeys]. - _, ok := seen[v[0].Key()] - if ok { - continue - } - out[k] = v[0].Name() - } - - return out -} - -// ResourceCompletions returns a list of keys that unambiguously reference resources in the bundle. -func ResourceCompletions(b *bundle.Bundle) []string { - return maps.Keys(ResourceCompletionMap(b)) -} diff --git a/bundle/run/keys_test.go b/bundle/run/keys_test.go deleted file mode 100644 index 5ab73b13d..000000000 --- a/bundle/run/keys_test.go +++ /dev/null @@ -1,25 +0,0 @@ -package run - -import ( - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/resources" - "github.com/stretchr/testify/assert" -) - -func TestResourceCompletionsUnique(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "foo": {}, - "bar": {}, - }, - }, - }, - } - - assert.ElementsMatch(t, []string{"foo", "bar"}, ResourceCompletions(b)) -} diff --git a/bundle/run/runner.go b/bundle/run/runner.go index 1cdcc9d8b..4c907d068 100644 --- a/bundle/run/runner.go +++ b/bundle/run/runner.go @@ -3,9 +3,10 @@ package run import ( "context" "fmt" - "strings" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + refs "github.com/databricks/cli/bundle/resources" "github.com/databricks/cli/bundle/run/output" ) @@ -38,34 +39,24 @@ type Runner interface { argsHandler } -// Find locates a runner matching the specified argument. -// -// Its behavior is as follows: -// 1. Try to find a resource with identical to the argument. -// 2. Try to find a resource with . identical to the argument. -// -// If an argument resolves to multiple resources, it returns an error. -func Find(b *bundle.Bundle, arg string) (Runner, error) { - keyOnly, keyWithType := ResourceKeys(b) - if len(keyWithType) == 0 { - return nil, fmt.Errorf("bundle defines no resources") +// IsRunnable returns a filter that only allows runnable resources. +func IsRunnable(ref refs.Reference) bool { + switch ref.Resource.(type) { + case *resources.Job, *resources.Pipeline: + return true + default: + return false + } +} + +// ToRunner converts a resource reference to a runnable resource. +func ToRunner(b *bundle.Bundle, ref refs.Reference) (Runner, error) { + switch resource := ref.Resource.(type) { + case *resources.Job: + return &jobRunner{key: key(ref.KeyWithType), bundle: b, job: resource}, nil + case *resources.Pipeline: + return &pipelineRunner{key: key(ref.KeyWithType), bundle: b, pipeline: resource}, nil + default: + return nil, fmt.Errorf("unsupported resource type: %T", resource) } - - runners, ok := keyOnly[arg] - if !ok { - runners, ok = keyWithType[arg] - if !ok { - return nil, fmt.Errorf("no such resource: %s", arg) - } - } - - if len(runners) != 1 { - var keys []string - for _, runner := range runners { - keys = append(keys, runner.Key()) - } - return nil, fmt.Errorf("ambiguous: %s (can resolve to all of %s)", arg, strings.Join(keys, ", ")) - } - - return runners[0], nil } diff --git a/bundle/run/runner_test.go b/bundle/run/runner_test.go index 85baa192f..2fc5fa6fa 100644 --- a/bundle/run/runner_test.go +++ b/bundle/run/runner_test.go @@ -3,82 +3,14 @@ package run import ( "testing" - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + refs "github.com/databricks/cli/bundle/resources" "github.com/stretchr/testify/assert" ) -func TestFindNoResources(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{}, - }, - } - - _, err := Find(b, "foo") - assert.ErrorContains(t, err, "bundle defines no resources") -} - -func TestFindSingleArg(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "foo": {}, - }, - }, - }, - } - - _, err := Find(b, "foo") - assert.NoError(t, err) -} - -func TestFindSingleArgNotFound(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "foo": {}, - }, - }, - }, - } - - _, err := Find(b, "bar") - assert.ErrorContains(t, err, "no such resource: bar") -} - -func TestFindSingleArgAmbiguous(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "key": {}, - }, - Pipelines: map[string]*resources.Pipeline{ - "key": {}, - }, - }, - }, - } - - _, err := Find(b, "key") - assert.ErrorContains(t, err, "ambiguous: ") -} - -func TestFindSingleArgWithType(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "key": {}, - }, - }, - }, - } - - _, err := Find(b, "jobs.key") - assert.NoError(t, err) +func TestRunner_IsRunnable(t *testing.T) { + assert.True(t, IsRunnable(refs.Reference{Resource: &resources.Job{}})) + assert.True(t, IsRunnable(refs.Reference{Resource: &resources.Pipeline{}})) + assert.False(t, IsRunnable(refs.Reference{Resource: &resources.MlflowModel{}})) + assert.False(t, IsRunnable(refs.Reference{Resource: &resources.MlflowExperiment{}})) } diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index ed5bd2ef1..96851d0c0 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -1,12 +1,14 @@ package bundle import ( + "context" "encoding/json" "fmt" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/phases" + "github.com/databricks/cli/bundle/resources" "github.com/databricks/cli/bundle/run" "github.com/databricks/cli/bundle/run/output" "github.com/databricks/cli/cmd/bundle/utils" @@ -14,8 +16,54 @@ import ( "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/flags" "github.com/spf13/cobra" + "golang.org/x/exp/maps" ) +func promptRunArgument(ctx context.Context, b *bundle.Bundle) (string, error) { + // Compute map of "Human readable name of resource" -> "resource key". + inv := make(map[string]string) + for k, ref := range resources.Completions(b, run.IsRunnable) { + title := fmt.Sprintf("%s: %s", ref.Description.SingularTitle, ref.Resource.GetName()) + inv[title] = k + } + + key, err := cmdio.Select(ctx, inv, "Resource to run") + if err != nil { + return "", err + } + + return key, nil +} + +func resolveRunArgument(ctx context.Context, b *bundle.Bundle, args []string) (string, error) { + // If no arguments are specified, prompt the user to select something to run. + if len(args) == 0 && cmdio.IsPromptSupported(ctx) { + return promptRunArgument(ctx, b) + } + + if len(args) < 1 { + return "", fmt.Errorf("expected a KEY of the resource to run") + } + + return args[0], nil +} + +func keyToRunner(b *bundle.Bundle, arg string) (run.Runner, error) { + // Locate the resource to run. + ref, err := resources.Lookup(b, arg, run.IsRunnable) + if err != nil { + return nil, err + } + + // Convert the resource to a runnable resource. + runner, err := run.ToRunner(b, ref) + if err != nil { + return nil, err + } + + return runner, nil +} + func newRunCommand() *cobra.Command { cmd := &cobra.Command{ Use: "run [flags] KEY", @@ -61,22 +109,9 @@ task or a Python wheel task, the second example applies. return err } - // If no arguments are specified, prompt the user to select something to run. - if len(args) == 0 && cmdio.IsPromptSupported(ctx) { - // Invert completions from KEY -> NAME, to NAME -> KEY. - inv := make(map[string]string) - for k, v := range run.ResourceCompletionMap(b) { - inv[v] = k - } - id, err := cmdio.Select(ctx, inv, "Resource to run") - if err != nil { - return err - } - args = append(args, id) - } - - if len(args) < 1 { - return fmt.Errorf("expected a KEY of the resource to run") + arg, err := resolveRunArgument(ctx, b, args) + if err != nil { + return err } diags = bundle.Apply(ctx, b, bundle.Seq( @@ -89,7 +124,7 @@ task or a Python wheel task, the second example applies. return err } - runner, err := run.Find(b, args[0]) + runner, err := keyToRunner(b, arg) if err != nil { return err } @@ -146,10 +181,11 @@ task or a Python wheel task, the second example applies. } if len(args) == 0 { - return run.ResourceCompletions(b), cobra.ShellCompDirectiveNoFileComp + completions := resources.Completions(b, run.IsRunnable) + return maps.Keys(completions), cobra.ShellCompDirectiveNoFileComp } else { // If we know the resource to run, we can complete additional positional arguments. - runner, err := run.Find(b, args[0]) + runner, err := keyToRunner(b, args[0]) if err != nil { return nil, cobra.ShellCompDirectiveError } From 5a555de5032a6823861e03fdd3434200c3ecfc69 Mon Sep 17 00:00:00 2001 From: hectorcast-db Date: Fri, 25 Oct 2024 11:15:24 +0200 Subject: [PATCH 14/14] [Internal] Automatically trigger integration tests on PR (#1857) ## Changes Automatically trigger integration tests when a PR is opened or updated ## Tests Workflow below. --------- Co-authored-by: Pieter Noordhuis --- .github/workflows/integration-tests.yml | 60 +++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 .github/workflows/integration-tests.yml diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 000000000..a40cdf32c --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,60 @@ +name: integration + +on: + + pull_request: + types: [opened, synchronize] + + merge_group: + + +jobs: + trigger-tests: + if: github.event_name == 'pull_request' + runs-on: ubuntu-latest + environment: "test-trigger-is" + + steps: + - uses: actions/checkout@v4 + + - name: Generate GitHub App Token + id: generate-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }} + private-key: ${{ secrets.DECO_WORKFLOW_TRIGGER_PRIVATE_KEY }} + owner: ${{ secrets.ORG_NAME }} + repositories: ${{secrets.REPO_NAME}} + + - name: Trigger Workflow in Another Repo + env: + GH_TOKEN: ${{ steps.generate-token.outputs.token }} + run: | + gh workflow run cli-isolated-pr.yml -R ${{ secrets.ORG_NAME }}/${{secrets.REPO_NAME}} \ + --ref main \ + -f pull_request_number=${{ github.event.pull_request.number }} \ + -f commit_sha=${{ github.event.pull_request.head.sha }} + + + + # Statuses and checks apply to specific commits (by hash). + # Enforcement of required checks is done both at the PR level and the merge queue level. + # In case of multiple commits in a single PR, the hash of the squashed commit + # will not match the one for the latest (approved) commit in the PR. + # We auto approve the check for the merge queue for two reasons: + # * Queue times out due to duration of tests. + # * Avoid running integration tests twice, since it was already run at the tip of the branch before squashing. + auto-approve: + if: github.event_name == 'merge_group' + runs-on: ubuntu-latest + steps: + - name: Mark Check + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + shell: bash + run: | + gh api -X POST -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + /repos/${{ github.repository }}/statuses/${{ github.sha }} \ + -f 'state=success' \ + -f 'context=Integration Tests Check'