From db6313e99c55f8aed8de6e0df7454c1ab7cc3293 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 5 Jul 2023 17:30:54 +0200 Subject: [PATCH 01/18] Fix secrets put-secret command (#545) ## Changes Two issues with this command: * The command line arguments for the secret value were ignored * If the secret value was piped through stdin, it would still prompt The second issue prevented users from using multi-line strings because the prompt reads until end-of-line. This change adds testing infrastructure for: * Setting up a workspace focused test (common between many tests) * Running a snippet of Python through the command execution API Porting more integration tests to use this infrastructure will be done in later commits. ## Tests New integration test passes. The interactive path cannot be integration tested just yet. --- cmd/workspace/secrets/overrides.go | 59 +++++++++++++++----- internal/acc/debug.go | 42 +++++++++++++++ internal/acc/helpers.go | 35 ++++++++++++ internal/acc/workspace.go | 68 +++++++++++++++++++++++ internal/secrets_test.go | 86 ++++++++++++++++++++++++++++++ libs/cmdio/io.go | 11 ++-- 6 files changed, 283 insertions(+), 18 deletions(-) create mode 100644 internal/acc/debug.go create mode 100644 internal/acc/helpers.go create mode 100644 internal/acc/workspace.go diff --git a/cmd/workspace/secrets/overrides.go b/cmd/workspace/secrets/overrides.go index d46284bf6..5443aca28 100644 --- a/cmd/workspace/secrets/overrides.go +++ b/cmd/workspace/secrets/overrides.go @@ -1,6 +1,11 @@ package secrets import ( + "encoding/base64" + "fmt" + "io" + "os" + "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/flags" @@ -40,15 +45,14 @@ var putSecretCmd = &cobra.Command{ and cannot exceed 128 characters. The maximum allowed secret value size is 128 KB. The maximum number of secrets in a given scope is 1000. - The input fields "string_value" or "bytes_value" specify the type of the - secret, which will determine the value returned when the secret value is - requested. Exactly one must be specified. + The arguments "string-value" or "bytes-value" specify the type of the secret, + which will determine the value returned when the secret value is requested. - Throws RESOURCE_DOES_NOT_EXIST if no such secret scope exists. Throws - RESOURCE_LIMIT_EXCEEDED if maximum number of secrets in scope is exceeded. - Throws INVALID_PARAMETER_VALUE if the key name or value length is invalid. - Throws PERMISSION_DENIED if the user does not have permission to make this - API call.`, + You can specify the secret value in one of three ways: + * Specify the value as a string using the --string-value flag. + * Input the secret when prompted interactively (single-line secrets). + * Pass the secret via standard input (multi-line secrets). + `, Annotations: map[string]string{}, Args: func(cmd *cobra.Command, args []string) error { @@ -62,6 +66,13 @@ var putSecretCmd = &cobra.Command{ RunE: func(cmd *cobra.Command, args []string) (err error) { ctx := cmd.Context() w := root.WorkspaceClient(ctx) + + bytesValueChanged := cmd.Flags().Changed("bytes-value") + stringValueChanged := cmd.Flags().Changed("string-value") + if bytesValueChanged && stringValueChanged { + return fmt.Errorf("cannot specify both --bytes-value and --string-value") + } + if cmd.Flags().Changed("json") { err = putSecretJson.Unmarshal(&putSecretReq) if err != nil { @@ -71,12 +82,20 @@ var putSecretCmd = &cobra.Command{ putSecretReq.Scope = args[0] putSecretReq.Key = args[1] - value, err := cmdio.Secret(ctx) - if err != nil { - return err + switch { + case bytesValueChanged: + // Bytes value set; encode as base64. + putSecretReq.BytesValue = base64.StdEncoding.EncodeToString([]byte(putSecretReq.BytesValue)) + case stringValueChanged: + // String value set; nothing to do. + default: + // Neither is specified; read secret value from stdin. + bytes, err := promptSecret(cmd) + if err != nil { + return err + } + putSecretReq.BytesValue = base64.StdEncoding.EncodeToString(bytes) } - - putSecretReq.StringValue = value } err = w.Secrets.PutSecret(ctx, putSecretReq) @@ -86,3 +105,17 @@ var putSecretCmd = &cobra.Command{ return nil }, } + +func promptSecret(cmd *cobra.Command) ([]byte, error) { + // If stdin is a TTY, prompt for the secret. + if !cmdio.IsInTTY(cmd.Context()) { + return io.ReadAll(os.Stdin) + } + + value, err := cmdio.Secret(cmd.Context(), "Please enter your secret value") + if err != nil { + return nil, err + } + + return []byte(value), nil +} diff --git a/internal/acc/debug.go b/internal/acc/debug.go new file mode 100644 index 000000000..467642e24 --- /dev/null +++ b/internal/acc/debug.go @@ -0,0 +1,42 @@ +package acc + +import ( + "encoding/json" + "os" + "path" + "path/filepath" + "testing" +) + +// Detects if test is run from "debug test" feature in VS Code. +func isInDebug() bool { + ex, _ := os.Executable() + return path.Base(ex) == "__debug_bin" +} + +// Loads debug environment from ~/.databricks/debug-env.json. +func loadDebugEnvIfRunFromIDE(t *testing.T, key string) { + if !isInDebug() { + return + } + home, err := os.UserHomeDir() + if err != nil { + t.Fatalf("cannot find user home: %s", err) + } + raw, err := os.ReadFile(filepath.Join(home, ".databricks/debug-env.json")) + if err != nil { + t.Fatalf("cannot load ~/.databricks/debug-env.json: %s", err) + } + var conf map[string]map[string]string + err = json.Unmarshal(raw, &conf) + if err != nil { + t.Fatalf("cannot parse ~/.databricks/debug-env.json: %s", err) + } + vars, ok := conf[key] + if !ok { + t.Fatalf("~/.databricks/debug-env.json#%s not configured", key) + } + for k, v := range vars { + os.Setenv(k, v) + } +} diff --git a/internal/acc/helpers.go b/internal/acc/helpers.go new file mode 100644 index 000000000..aa9902745 --- /dev/null +++ b/internal/acc/helpers.go @@ -0,0 +1,35 @@ +package acc + +import ( + "fmt" + "math/rand" + "os" + "strings" + "testing" + "time" +) + +// GetEnvOrSkipTest proceeds with test only with that env variable. +func GetEnvOrSkipTest(t *testing.T, name string) string { + value := os.Getenv(name) + if value == "" { + t.Skipf("Environment variable %s is missing", name) + } + return value +} + +const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + +// RandomName gives random name with optional prefix. e.g. qa.RandomName("tf-") +func RandomName(prefix ...string) string { + rand.Seed(time.Now().UnixNano()) + randLen := 12 + b := make([]byte, randLen) + for i := range b { + b[i] = charset[rand.Intn(randLen)] + } + if len(prefix) > 0 { + return fmt.Sprintf("%s%s", strings.Join(prefix, ""), b) + } + return string(b) +} diff --git a/internal/acc/workspace.go b/internal/acc/workspace.go new file mode 100644 index 000000000..8944e199f --- /dev/null +++ b/internal/acc/workspace.go @@ -0,0 +1,68 @@ +package acc + +import ( + "context" + "testing" + + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/stretchr/testify/require" +) + +type WorkspaceT struct { + *testing.T + + W *databricks.WorkspaceClient + + ctx context.Context + + exec *compute.CommandExecutorV2 +} + +func WorkspaceTest(t *testing.T) (context.Context, *WorkspaceT) { + loadDebugEnvIfRunFromIDE(t, "workspace") + + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + wt := &WorkspaceT{ + T: t, + + W: w, + + ctx: context.Background(), + } + + return wt.ctx, wt +} + +func (t *WorkspaceT) TestClusterID() string { + clusterID := GetEnvOrSkipTest(t.T, "TEST_BRICKS_CLUSTER_ID") + err := t.W.Clusters.EnsureClusterIsRunning(t.ctx, clusterID) + require.NoError(t, err) + return clusterID +} + +func (t *WorkspaceT) RunPython(code string) (string, error) { + var err error + + // Create command executor only once per test. + if t.exec == nil { + t.exec, err = t.W.CommandExecution.Start(t.ctx, t.TestClusterID(), compute.LanguagePython) + require.NoError(t, err) + + t.Cleanup(func() { + err := t.exec.Destroy(t.ctx) + require.NoError(t, err) + }) + } + + results, err := t.exec.Execute(t.ctx, code) + require.NoError(t, err) + require.NotEqual(t, compute.ResultTypeError, results.ResultType, results.Cause) + output, ok := results.Data.(string) + require.True(t, ok, "unexpected type %T", results.Data) + return output, nil +} diff --git a/internal/secrets_test.go b/internal/secrets_test.go index 1fdc48bdd..1e9c86abf 100644 --- a/internal/secrets_test.go +++ b/internal/secrets_test.go @@ -1,12 +1,98 @@ package internal import ( + "context" + "encoding/base64" + "fmt" "testing" + "github.com/databricks/cli/internal/acc" + "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestSecretsCreateScopeErrWhenNoArguments(t *testing.T) { _, _, err := RequireErrorRun(t, "secrets", "create-scope") assert.Equal(t, "accepts 1 arg(s), received 0", err.Error()) } + +func temporarySecretScope(ctx context.Context, t *acc.WorkspaceT) string { + scope := acc.RandomName("cli-acc-") + err := t.W.Secrets.CreateScope(ctx, workspace.CreateScope{ + Scope: scope, + }) + require.NoError(t, err) + + // Delete the scope after the test. + t.Cleanup(func() { + err := t.W.Secrets.DeleteScopeByScope(ctx, scope) + require.NoError(t, err) + }) + + return scope +} + +func assertSecretStringValue(t *acc.WorkspaceT, scope, key, expected string) { + out, err := t.RunPython(fmt.Sprintf(` + import base64 + value = dbutils.secrets.get(scope="%s", key="%s") + encoded_value = base64.b64encode(value.encode('utf-8')) + print(encoded_value.decode('utf-8')) + `, scope, key)) + require.NoError(t, err) + + decoded, err := base64.StdEncoding.DecodeString(out) + require.NoError(t, err) + assert.Equal(t, expected, string(decoded)) +} + +func assertSecretBytesValue(t *acc.WorkspaceT, scope, key string, expected []byte) { + out, err := t.RunPython(fmt.Sprintf(` + import base64 + value = dbutils.secrets.getBytes(scope="%s", key="%s") + encoded_value = base64.b64encode(value) + print(encoded_value.decode('utf-8')) + `, scope, key)) + require.NoError(t, err) + + decoded, err := base64.StdEncoding.DecodeString(out) + require.NoError(t, err) + assert.Equal(t, expected, decoded) +} + +func TestSecretsPutSecretStringValue(tt *testing.T) { + ctx, t := acc.WorkspaceTest(tt) + scope := temporarySecretScope(ctx, t) + key := "test-key" + value := "test-value\nwith-newlines\n" + + stdout, stderr := RequireSuccessfulRun(t.T, "secrets", "put-secret", scope, key, "--string-value", value) + assert.Empty(t, stdout) + assert.Empty(t, stderr) + + assertSecretStringValue(t, scope, key, value) + assertSecretBytesValue(t, scope, key, []byte(value)) +} + +func TestSecretsPutSecretBytesValue(tt *testing.T) { + ctx, t := acc.WorkspaceTest(tt) + + if true { + // Uncomment below to run this test in isolation. + // To be addressed once none of the commands taint global state. + t.Skip("skipping because the test above clobbers global state") + } + + scope := temporarySecretScope(ctx, t) + key := "test-key" + value := []byte{0x00, 0x01, 0x02, 0x03} + + stdout, stderr := RequireSuccessfulRun(t.T, "secrets", "put-secret", scope, key, "--bytes-value", string(value)) + assert.Empty(t, stdout) + assert.Empty(t, stderr) + + // Note: this value cannot be represented as Python string, + // so we only check equality through the dbutils.secrets.getBytes API. + assertSecretBytesValue(t, scope, key, value) +} diff --git a/libs/cmdio/io.go b/libs/cmdio/io.go index 8b40294b9..a60231c09 100644 --- a/libs/cmdio/io.go +++ b/libs/cmdio/io.go @@ -174,18 +174,19 @@ func Select[V any](ctx context.Context, names map[string]V, label string) (id st return c.Select(stringNames, label) } -func (c *cmdIO) Secret() (value string, err error) { +func (c *cmdIO) Secret(label string) (value string, err error) { prompt := (promptui.Prompt{ - Label: "Enter your secrets value", - Mask: '*', + Label: label, + Mask: '*', + HideEntered: true, }) return prompt.Run() } -func Secret(ctx context.Context) (value string, err error) { +func Secret(ctx context.Context, label string) (value string, err error) { c := fromContext(ctx) - return c.Secret() + return c.Secret(label) } type nopWriteCloser struct { From 533234f14826444d186d222c27a6733dcc09da8d Mon Sep 17 00:00:00 2001 From: stikkireddy <54602805+stikkireddy@users.noreply.github.com> Date: Wed, 5 Jul 2023 15:58:06 -0400 Subject: [PATCH 02/18] Fix: bundle destroy fails when bundle.tf.json file is deleted (#519) ## Changes Adds the following steps to the destroy phase: 1. interpolate 2. write Resolves #518 ## Tests Tested manually due there not being an examples for tests to use. --- bundle/phases/destroy.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index b0ed5d627..5841916d1 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -14,6 +14,8 @@ func Destroy() bundle.Mutator { lock.Acquire(), bundle.Defer( bundle.Seq( + terraform.Interpolate(), + terraform.Write(), terraform.StatePull(), terraform.Plan(terraform.PlanGoal("destroy")), terraform.Destroy(), From 8eccc3442fa20553364c5e9c24543711792b410c Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 6 Jul 2023 11:59:53 +0200 Subject: [PATCH 03/18] Renamed method to HasRequiredNonBodyField (#553) ## Changes Renamed method to HasRequiredNonBodyField in line with https://github.com/databricks/databricks-sdk-go/pull/536 --- .codegen/service.go.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.codegen/service.go.tmpl b/.codegen/service.go.tmpl index 64c80b9a8..76f4a94ee 100644 --- a/.codegen/service.go.tmpl +++ b/.codegen/service.go.tmpl @@ -44,7 +44,7 @@ var Cmd = &cobra.Command{ {{end}} // start {{.KebabName}} command -{{- $useJsonForAllFields := or .IsJsonOnly (and .Request (or (not .Request.IsAllRequiredFieldsPrimitive) .Request.IsAllRequiredFieldsJsonUnserialisable)) -}} +{{- $useJsonForAllFields := or .IsJsonOnly (and .Request (or (not .Request.IsAllRequiredFieldsPrimitive) .Request.HasRequiredNonBodyField)) -}} {{- $needJsonFlag := or $useJsonForAllFields (and .Request (not .Request.IsOnlyPrimitiveFields)) -}} {{- if .Request}} var {{.CamelName}}Req {{.Service.Package.Name}}.{{.Request.PascalName}} From 6f023f46d8f732129f8d11b29f136d1b8d7d87dc Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Thu, 6 Jul 2023 13:16:14 +0200 Subject: [PATCH 04/18] Update cp help message to not require file scheme (#554) ## Tests Manually --------- Co-authored-by: Pieter Noordhuis --- cmd/fs/cp.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/fs/cp.go b/cmd/fs/cp.go index 399a1aea8..204d6c33c 100644 --- a/cmd/fs/cp.go +++ b/cmd/fs/cp.go @@ -132,8 +132,8 @@ var cpCmd = &cobra.Command{ Short: "Copy files and directories to and from DBFS.", Long: `Copy files to and from DBFS. - It is required that you specify the scheme "file" for local files and - "dbfs" for dbfs files. For example: file:/foo/bar, file:/c:/foo/bar or dbfs:/foo/bar. + For paths in DBFS it is required that you specify the "dbfs" scheme. + For example: dbfs:/foo/bar. Recursively copying a directory will copy all files inside directory at SOURCE_PATH to the directory at TARGET_PATH. From b14920cd12f263ef5bc974550b7fa1b32c689a24 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 7 Jul 2023 12:22:58 +0200 Subject: [PATCH 05/18] Fixed error reporting when included invalid files in include section (#543) ## Changes Fixed error reporting when included invalid files in include section Case 1. When the file to include is invalid, throw an error Case 2. When the file is loaded but the schema is wrong, indicate which file is failed to load ## Tests With non-existent notexists.yml ``` databricks bundle deploy Error: notexists.yml defined in 'include' section does not match any files ``` With malformed notexists.yml ``` databricks bundle deploy Error: failed to load /Users/andrew.nester/dabs/wheel/notexists.yml: error unmarshaling JSON: json: cannot unmarshal string into Go value of type config.Root ``` --- .../config/mutator/process_root_includes.go | 7 ++++ .../mutator/process_root_includes_test.go | 14 ++++++++ bundle/config/root.go | 2 +- bundle/tests/include_invalid/bundle.yml | 5 +++ bundle/tests/include_test.go | 34 +++++++++++++++++++ bundle/tests/include_with_glob/bundle.yml | 7 ++++ bundle/tests/include_with_glob/job.yml | 4 +++ 7 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 bundle/tests/include_invalid/bundle.yml create mode 100644 bundle/tests/include_test.go create mode 100644 bundle/tests/include_with_glob/bundle.yml create mode 100644 bundle/tests/include_with_glob/job.yml diff --git a/bundle/config/mutator/process_root_includes.go b/bundle/config/mutator/process_root_includes.go index ef055674a..454e3a987 100644 --- a/bundle/config/mutator/process_root_includes.go +++ b/bundle/config/mutator/process_root_includes.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "path/filepath" + "strings" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" @@ -49,6 +50,12 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) error return err } + // If the entry is not a glob pattern and no matches found, + // return an error because the file defined is not found + if len(matches) == 0 && !strings.ContainsAny(entry, "*?[") { + return fmt.Errorf("%s defined in 'include' section does not match any files", entry) + } + // Filter matches to ones we haven't seen yet. var includes []string for _, match := range matches { diff --git a/bundle/config/mutator/process_root_includes_test.go b/bundle/config/mutator/process_root_includes_test.go index 1e7f6d1a2..c7d00d88b 100644 --- a/bundle/config/mutator/process_root_includes_test.go +++ b/bundle/config/mutator/process_root_includes_test.go @@ -108,3 +108,17 @@ func TestProcessRootIncludesRemoveDups(t *testing.T) { require.NoError(t, err) assert.Equal(t, []string{"a.yml"}, bundle.Config.Include) } + +func TestProcessRootIncludesNotExists(t *testing.T) { + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: t.TempDir(), + Include: []string{ + "notexist.yml", + }, + }, + } + err := mutator.ProcessRootIncludes().Apply(context.Background(), bundle) + require.Error(t, err) + assert.Contains(t, err.Error(), "notexist.yml defined in 'include' section does not match any files") +} diff --git a/bundle/config/root.go b/bundle/config/root.go index 28333e988..8e8325733 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -118,7 +118,7 @@ func (r *Root) Load(path string) error { } err = yaml.Unmarshal(raw, r) if err != nil { - return err + return fmt.Errorf("failed to load %s: %w", path, err) } r.Path = filepath.Dir(path) diff --git a/bundle/tests/include_invalid/bundle.yml b/bundle/tests/include_invalid/bundle.yml new file mode 100644 index 000000000..f59e2ae0a --- /dev/null +++ b/bundle/tests/include_invalid/bundle.yml @@ -0,0 +1,5 @@ +bundle: + name: include_invalid + +include: + - notexists.yml diff --git a/bundle/tests/include_test.go b/bundle/tests/include_test.go new file mode 100644 index 000000000..d704b8380 --- /dev/null +++ b/bundle/tests/include_test.go @@ -0,0 +1,34 @@ +package config_tests + +import ( + "context" + "path/filepath" + "sort" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/exp/maps" +) + +func TestIncludeInvalid(t *testing.T) { + b, err := bundle.Load("./include_invalid") + require.NoError(t, err) + err = bundle.Apply(context.Background(), b, bundle.Seq(mutator.DefaultMutators()...)) + require.Error(t, err) + assert.Contains(t, err.Error(), "notexists.yml defined in 'include' section does not match any files") +} + +func TestIncludeWithGlob(t *testing.T) { + b := load(t, "./include_with_glob") + + keys := maps.Keys(b.Config.Resources.Jobs) + sort.Strings(keys) + assert.Equal(t, []string{"my_job"}, keys) + + job := b.Config.Resources.Jobs["my_job"] + assert.Equal(t, "1", job.ID) + assert.Equal(t, "include_with_glob/job.yml", filepath.ToSlash(job.ConfigFilePath)) +} diff --git a/bundle/tests/include_with_glob/bundle.yml b/bundle/tests/include_with_glob/bundle.yml new file mode 100644 index 000000000..b1d078f9c --- /dev/null +++ b/bundle/tests/include_with_glob/bundle.yml @@ -0,0 +1,7 @@ +bundle: + name: include_with_glob + +include: + - "*.yml" + - "?.yml" + - "[a-z].yml" diff --git a/bundle/tests/include_with_glob/job.yml b/bundle/tests/include_with_glob/job.yml new file mode 100644 index 000000000..3d609c529 --- /dev/null +++ b/bundle/tests/include_with_glob/job.yml @@ -0,0 +1,4 @@ +resources: + jobs: + my_job: + id: 1 From 179154477e1384db3e0bb8bc3aa3e3ee653eb334 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Fri, 7 Jul 2023 13:20:37 +0200 Subject: [PATCH 06/18] Propagate TF_CLI_CONFIG_FILE env variable (#555) ## Changes Propagate `TF_CLI_CONFIG_FILE` env variable. From Terraform documentation: > The location of the Terraform CLI configuration file can also be specified using the TF_CLI_CONFIG_FILE [environment variable](https://developer.hashicorp.com/terraform/cli/config/environment-variables) It allows using custom builds of terraform-provider-databricks, using config files like: ```tf provider_installation { dev_overrides { "databricks/databricks" = "/Users/gleb.kanterov/terraform-provider-databricks" } direct {} } ``` ## Tests I added unit tests. --- bundle/deploy/terraform/init.go | 24 ++++++++++++++++++++---- bundle/deploy/terraform/init_test.go | 16 ++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/bundle/deploy/terraform/init.go b/bundle/deploy/terraform/init.go index 0ce15acbb..eb3e99d18 100644 --- a/bundle/deploy/terraform/init.go +++ b/bundle/deploy/terraform/init.go @@ -70,6 +70,23 @@ func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *con return tf.ExecPath, nil } +// This function inherits some environment variables for Terraform CLI. +func inheritEnvVars(env map[string]string) error { + // Include $HOME in set of environment variables to pass along. + home, ok := os.LookupEnv("HOME") + if ok { + env["HOME"] = home + } + + // Include $TF_CLI_CONFIG_FILE to override terraform provider in development. + configFile, ok := os.LookupEnv("TF_CLI_CONFIG_FILE") + if ok { + env["TF_CLI_CONFIG_FILE"] = configFile + } + + return nil +} + // This function sets temp dir location for terraform to use. If user does not // specify anything here, we fall back to a `tmp` directory in the bundle's cache // directory @@ -145,10 +162,9 @@ func (m *initialize) Apply(ctx context.Context, b *bundle.Bundle) error { return err } - // Include $HOME in set of environment variables to pass along. - home, ok := os.LookupEnv("HOME") - if ok { - env["HOME"] = home + err = inheritEnvVars(env) + if err != nil { + return err } // Set the temporary directory environment variables diff --git a/bundle/deploy/terraform/init_test.go b/bundle/deploy/terraform/init_test.go index aafe5660c..79e18170e 100644 --- a/bundle/deploy/terraform/init_test.go +++ b/bundle/deploy/terraform/init_test.go @@ -272,3 +272,19 @@ func TestSetProxyEnvVars(t *testing.T) { require.NoError(t, err) assert.ElementsMatch(t, []string{"HTTP_PROXY", "HTTPS_PROXY", "NO_PROXY"}, maps.Keys(env)) } + +func TestInheritEnvVars(t *testing.T) { + env := map[string]string{} + + t.Setenv("HOME", "/home/testuser") + t.Setenv("TF_CLI_CONFIG_FILE", "/tmp/config.tfrc") + + err := inheritEnvVars(env) + + require.NoError(t, err) + + require.Equal(t, map[string]string{ + "HOME": "/home/testuser", + "TF_CLI_CONFIG_FILE": "/tmp/config.tfrc", + }, env) +} From b6665f4b309b09fdb8532796db7e0491d5386973 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 7 Jul 2023 14:52:41 +0200 Subject: [PATCH 07/18] Update Terraform provider schema structs (#563) ## Changes Generated from 47857a63c7242fc43aba833cdd28b222fd25c399 (next release after 1.20). ## Tests The change is additive and unit tests pass. --- bundle/internal/tf/schema/config.go | 2 + bundle/internal/tf/schema/data_source_job.go | 46 ++++++++++++++++--- .../resource_catalog_workspace_binding.go | 9 ++++ bundle/internal/tf/schema/resource_grants.go | 1 + bundle/internal/tf/schema/resource_job.go | 46 ++++++++++++++++--- .../tf/schema/resource_model_serving.go | 11 +++-- .../tf/schema/resource_service_principal.go | 1 + .../tf/schema/resource_sql_global_config.go | 1 + bundle/internal/tf/schema/resource_user.go | 1 + bundle/internal/tf/schema/resources.go | 2 + 10 files changed, 103 insertions(+), 17 deletions(-) create mode 100644 bundle/internal/tf/schema/resource_catalog_workspace_binding.go diff --git a/bundle/internal/tf/schema/config.go b/bundle/internal/tf/schema/config.go index 0e5ec3fab..d24d57339 100644 --- a/bundle/internal/tf/schema/config.go +++ b/bundle/internal/tf/schema/config.go @@ -14,6 +14,7 @@ type Config struct { AzureWorkspaceResourceId string `json:"azure_workspace_resource_id,omitempty"` ClientId string `json:"client_id,omitempty"` ClientSecret string `json:"client_secret,omitempty"` + ClusterId string `json:"cluster_id,omitempty"` ConfigFile string `json:"config_file,omitempty"` DatabricksCliPath string `json:"databricks_cli_path,omitempty"` DebugHeaders bool `json:"debug_headers,omitempty"` @@ -30,4 +31,5 @@ type Config struct { SkipVerify bool `json:"skip_verify,omitempty"` Token string `json:"token,omitempty"` Username string `json:"username,omitempty"` + WarehouseId string `json:"warehouse_id,omitempty"` } diff --git a/bundle/internal/tf/schema/data_source_job.go b/bundle/internal/tf/schema/data_source_job.go index ec0aa6f57..a633bd3a3 100644 --- a/bundle/internal/tf/schema/data_source_job.go +++ b/bundle/internal/tf/schema/data_source_job.go @@ -2,6 +2,15 @@ package schema +type DataSourceJobJobSettingsSettingsComputeSpec struct { + Kind string `json:"kind,omitempty"` +} + +type DataSourceJobJobSettingsSettingsCompute struct { + ComputeKey string `json:"compute_key,omitempty"` + Spec *DataSourceJobJobSettingsSettingsComputeSpec `json:"spec,omitempty"` +} + type DataSourceJobJobSettingsSettingsContinuous struct { PauseStatus string `json:"pause_status,omitempty"` } @@ -415,6 +424,12 @@ type DataSourceJobJobSettingsSettingsSparkSubmitTask struct { Parameters []string `json:"parameters,omitempty"` } +type DataSourceJobJobSettingsSettingsTaskConditionTask struct { + Left string `json:"left,omitempty"` + Op string `json:"op,omitempty"` + Right string `json:"right,omitempty"` +} + type DataSourceJobJobSettingsSettingsTaskDbtTask struct { Catalog string `json:"catalog,omitempty"` Commands []string `json:"commands"` @@ -425,7 +440,8 @@ type DataSourceJobJobSettingsSettingsTaskDbtTask struct { } type DataSourceJobJobSettingsSettingsTaskDependsOn struct { - TaskKey string `json:"task_key,omitempty"` + Outcome string `json:"outcome,omitempty"` + TaskKey string `json:"task_key"` } type DataSourceJobJobSettingsSettingsTaskEmailNotifications struct { @@ -645,12 +661,27 @@ type DataSourceJobJobSettingsSettingsTaskSparkSubmitTask struct { Parameters []string `json:"parameters,omitempty"` } +type DataSourceJobJobSettingsSettingsTaskSqlTaskAlertSubscriptions struct { + DestinationId string `json:"destination_id,omitempty"` + UserName string `json:"user_name,omitempty"` +} + type DataSourceJobJobSettingsSettingsTaskSqlTaskAlert struct { - AlertId string `json:"alert_id"` + AlertId string `json:"alert_id"` + PauseSubscriptions bool `json:"pause_subscriptions,omitempty"` + Subscriptions []DataSourceJobJobSettingsSettingsTaskSqlTaskAlertSubscriptions `json:"subscriptions,omitempty"` +} + +type DataSourceJobJobSettingsSettingsTaskSqlTaskDashboardSubscriptions struct { + DestinationId string `json:"destination_id,omitempty"` + UserName string `json:"user_name,omitempty"` } type DataSourceJobJobSettingsSettingsTaskSqlTaskDashboard struct { - DashboardId string `json:"dashboard_id"` + CustomSubject string `json:"custom_subject,omitempty"` + DashboardId string `json:"dashboard_id"` + PauseSubscriptions bool `json:"pause_subscriptions,omitempty"` + Subscriptions []DataSourceJobJobSettingsSettingsTaskSqlTaskDashboardSubscriptions `json:"subscriptions,omitempty"` } type DataSourceJobJobSettingsSettingsTaskSqlTaskFile struct { @@ -671,6 +702,7 @@ type DataSourceJobJobSettingsSettingsTaskSqlTask struct { } type DataSourceJobJobSettingsSettingsTask struct { + ComputeKey string `json:"compute_key,omitempty"` Description string `json:"description,omitempty"` ExistingClusterId string `json:"existing_cluster_id,omitempty"` JobClusterKey string `json:"job_cluster_key,omitempty"` @@ -680,6 +712,7 @@ type DataSourceJobJobSettingsSettingsTask struct { RunIf string `json:"run_if,omitempty"` TaskKey string `json:"task_key,omitempty"` TimeoutSeconds int `json:"timeout_seconds,omitempty"` + ConditionTask *DataSourceJobJobSettingsSettingsTaskConditionTask `json:"condition_task,omitempty"` DbtTask *DataSourceJobJobSettingsSettingsTaskDbtTask `json:"dbt_task,omitempty"` DependsOn []DataSourceJobJobSettingsSettingsTaskDependsOn `json:"depends_on,omitempty"` EmailNotifications *DataSourceJobJobSettingsSettingsTaskEmailNotifications `json:"email_notifications,omitempty"` @@ -695,9 +728,9 @@ type DataSourceJobJobSettingsSettingsTask struct { } type DataSourceJobJobSettingsSettingsTriggerFileArrival struct { - MinTimeBetweenTriggerSeconds int `json:"min_time_between_trigger_seconds,omitempty"` - Url string `json:"url"` - WaitAfterLastChangeSeconds int `json:"wait_after_last_change_seconds,omitempty"` + MinTimeBetweenTriggersSeconds int `json:"min_time_between_triggers_seconds,omitempty"` + Url string `json:"url"` + WaitAfterLastChangeSeconds int `json:"wait_after_last_change_seconds,omitempty"` } type DataSourceJobJobSettingsSettingsTrigger struct { @@ -733,6 +766,7 @@ type DataSourceJobJobSettingsSettings struct { RetryOnTimeout bool `json:"retry_on_timeout,omitempty"` Tags map[string]string `json:"tags,omitempty"` TimeoutSeconds int `json:"timeout_seconds,omitempty"` + Compute []DataSourceJobJobSettingsSettingsCompute `json:"compute,omitempty"` Continuous *DataSourceJobJobSettingsSettingsContinuous `json:"continuous,omitempty"` DbtTask *DataSourceJobJobSettingsSettingsDbtTask `json:"dbt_task,omitempty"` EmailNotifications *DataSourceJobJobSettingsSettingsEmailNotifications `json:"email_notifications,omitempty"` diff --git a/bundle/internal/tf/schema/resource_catalog_workspace_binding.go b/bundle/internal/tf/schema/resource_catalog_workspace_binding.go new file mode 100644 index 000000000..40bced985 --- /dev/null +++ b/bundle/internal/tf/schema/resource_catalog_workspace_binding.go @@ -0,0 +1,9 @@ +// Generated from Databricks Terraform provider schema. DO NOT EDIT. + +package schema + +type ResourceCatalogWorkspaceBinding struct { + CatalogName string `json:"catalog_name"` + Id string `json:"id,omitempty"` + WorkspaceId string `json:"workspace_id"` +} diff --git a/bundle/internal/tf/schema/resource_grants.go b/bundle/internal/tf/schema/resource_grants.go index c19adae85..fb754cc62 100644 --- a/bundle/internal/tf/schema/resource_grants.go +++ b/bundle/internal/tf/schema/resource_grants.go @@ -19,5 +19,6 @@ type ResourceGrants struct { StorageCredential string `json:"storage_credential,omitempty"` Table string `json:"table,omitempty"` View string `json:"view,omitempty"` + Volume string `json:"volume,omitempty"` Grant []ResourceGrantsGrant `json:"grant,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_job.go b/bundle/internal/tf/schema/resource_job.go index aea2fdd6c..e3137ea15 100644 --- a/bundle/internal/tf/schema/resource_job.go +++ b/bundle/internal/tf/schema/resource_job.go @@ -2,6 +2,15 @@ package schema +type ResourceJobComputeSpec struct { + Kind string `json:"kind,omitempty"` +} + +type ResourceJobCompute struct { + ComputeKey string `json:"compute_key,omitempty"` + Spec *ResourceJobComputeSpec `json:"spec,omitempty"` +} + type ResourceJobContinuous struct { PauseStatus string `json:"pause_status,omitempty"` } @@ -415,6 +424,12 @@ type ResourceJobSparkSubmitTask struct { Parameters []string `json:"parameters,omitempty"` } +type ResourceJobTaskConditionTask struct { + Left string `json:"left,omitempty"` + Op string `json:"op,omitempty"` + Right string `json:"right,omitempty"` +} + type ResourceJobTaskDbtTask struct { Catalog string `json:"catalog,omitempty"` Commands []string `json:"commands"` @@ -425,7 +440,8 @@ type ResourceJobTaskDbtTask struct { } type ResourceJobTaskDependsOn struct { - TaskKey string `json:"task_key,omitempty"` + Outcome string `json:"outcome,omitempty"` + TaskKey string `json:"task_key"` } type ResourceJobTaskEmailNotifications struct { @@ -645,12 +661,27 @@ type ResourceJobTaskSparkSubmitTask struct { Parameters []string `json:"parameters,omitempty"` } +type ResourceJobTaskSqlTaskAlertSubscriptions struct { + DestinationId string `json:"destination_id,omitempty"` + UserName string `json:"user_name,omitempty"` +} + type ResourceJobTaskSqlTaskAlert struct { - AlertId string `json:"alert_id"` + AlertId string `json:"alert_id"` + PauseSubscriptions bool `json:"pause_subscriptions,omitempty"` + Subscriptions []ResourceJobTaskSqlTaskAlertSubscriptions `json:"subscriptions,omitempty"` +} + +type ResourceJobTaskSqlTaskDashboardSubscriptions struct { + DestinationId string `json:"destination_id,omitempty"` + UserName string `json:"user_name,omitempty"` } type ResourceJobTaskSqlTaskDashboard struct { - DashboardId string `json:"dashboard_id"` + CustomSubject string `json:"custom_subject,omitempty"` + DashboardId string `json:"dashboard_id"` + PauseSubscriptions bool `json:"pause_subscriptions,omitempty"` + Subscriptions []ResourceJobTaskSqlTaskDashboardSubscriptions `json:"subscriptions,omitempty"` } type ResourceJobTaskSqlTaskFile struct { @@ -671,6 +702,7 @@ type ResourceJobTaskSqlTask struct { } type ResourceJobTask struct { + ComputeKey string `json:"compute_key,omitempty"` Description string `json:"description,omitempty"` ExistingClusterId string `json:"existing_cluster_id,omitempty"` JobClusterKey string `json:"job_cluster_key,omitempty"` @@ -680,6 +712,7 @@ type ResourceJobTask struct { RunIf string `json:"run_if,omitempty"` TaskKey string `json:"task_key,omitempty"` TimeoutSeconds int `json:"timeout_seconds,omitempty"` + ConditionTask *ResourceJobTaskConditionTask `json:"condition_task,omitempty"` DbtTask *ResourceJobTaskDbtTask `json:"dbt_task,omitempty"` DependsOn []ResourceJobTaskDependsOn `json:"depends_on,omitempty"` EmailNotifications *ResourceJobTaskEmailNotifications `json:"email_notifications,omitempty"` @@ -695,9 +728,9 @@ type ResourceJobTask struct { } type ResourceJobTriggerFileArrival struct { - MinTimeBetweenTriggerSeconds int `json:"min_time_between_trigger_seconds,omitempty"` - Url string `json:"url"` - WaitAfterLastChangeSeconds int `json:"wait_after_last_change_seconds,omitempty"` + MinTimeBetweenTriggersSeconds int `json:"min_time_between_triggers_seconds,omitempty"` + Url string `json:"url"` + WaitAfterLastChangeSeconds int `json:"wait_after_last_change_seconds,omitempty"` } type ResourceJobTrigger struct { @@ -736,6 +769,7 @@ type ResourceJob struct { Tags map[string]string `json:"tags,omitempty"` TimeoutSeconds int `json:"timeout_seconds,omitempty"` Url string `json:"url,omitempty"` + Compute []ResourceJobCompute `json:"compute,omitempty"` Continuous *ResourceJobContinuous `json:"continuous,omitempty"` DbtTask *ResourceJobDbtTask `json:"dbt_task,omitempty"` EmailNotifications *ResourceJobEmailNotifications `json:"email_notifications,omitempty"` diff --git a/bundle/internal/tf/schema/resource_model_serving.go b/bundle/internal/tf/schema/resource_model_serving.go index 6d5ff5391..b7ff88ccd 100644 --- a/bundle/internal/tf/schema/resource_model_serving.go +++ b/bundle/internal/tf/schema/resource_model_serving.go @@ -3,11 +3,12 @@ package schema type ResourceModelServingConfigServedModels struct { - ModelName string `json:"model_name"` - ModelVersion string `json:"model_version"` - Name string `json:"name,omitempty"` - ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"` - WorkloadSize string `json:"workload_size"` + EnvironmentVars map[string]string `json:"environment_vars,omitempty"` + ModelName string `json:"model_name"` + ModelVersion string `json:"model_version"` + Name string `json:"name,omitempty"` + ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"` + WorkloadSize string `json:"workload_size"` } type ResourceModelServingConfigTrafficConfigRoutes struct { diff --git a/bundle/internal/tf/schema/resource_service_principal.go b/bundle/internal/tf/schema/resource_service_principal.go index 86e17a99f..bdbce2278 100644 --- a/bundle/internal/tf/schema/resource_service_principal.go +++ b/bundle/internal/tf/schema/resource_service_principal.go @@ -8,6 +8,7 @@ type ResourceServicePrincipal struct { AllowInstancePoolCreate bool `json:"allow_instance_pool_create,omitempty"` ApplicationId string `json:"application_id,omitempty"` DatabricksSqlAccess bool `json:"databricks_sql_access,omitempty"` + DisableAsUserDeletion bool `json:"disable_as_user_deletion,omitempty"` DisplayName string `json:"display_name,omitempty"` ExternalId string `json:"external_id,omitempty"` Force bool `json:"force,omitempty"` diff --git a/bundle/internal/tf/schema/resource_sql_global_config.go b/bundle/internal/tf/schema/resource_sql_global_config.go index fbc5f0eca..62eddceed 100644 --- a/bundle/internal/tf/schema/resource_sql_global_config.go +++ b/bundle/internal/tf/schema/resource_sql_global_config.go @@ -5,6 +5,7 @@ package schema type ResourceSqlGlobalConfig struct { DataAccessConfig map[string]string `json:"data_access_config,omitempty"` EnableServerlessCompute bool `json:"enable_serverless_compute,omitempty"` + GoogleServiceAccount string `json:"google_service_account,omitempty"` Id string `json:"id,omitempty"` InstanceProfileArn string `json:"instance_profile_arn,omitempty"` SecurityPolicy string `json:"security_policy,omitempty"` diff --git a/bundle/internal/tf/schema/resource_user.go b/bundle/internal/tf/schema/resource_user.go index 9dd34c97d..b96440934 100644 --- a/bundle/internal/tf/schema/resource_user.go +++ b/bundle/internal/tf/schema/resource_user.go @@ -7,6 +7,7 @@ type ResourceUser struct { AllowClusterCreate bool `json:"allow_cluster_create,omitempty"` AllowInstancePoolCreate bool `json:"allow_instance_pool_create,omitempty"` DatabricksSqlAccess bool `json:"databricks_sql_access,omitempty"` + DisableAsUserDeletion bool `json:"disable_as_user_deletion,omitempty"` DisplayName string `json:"display_name,omitempty"` ExternalId string `json:"external_id,omitempty"` Force bool `json:"force,omitempty"` diff --git a/bundle/internal/tf/schema/resources.go b/bundle/internal/tf/schema/resources.go index 0620bd039..7a0c2eb8b 100644 --- a/bundle/internal/tf/schema/resources.go +++ b/bundle/internal/tf/schema/resources.go @@ -8,6 +8,7 @@ type Resources struct { AzureAdlsGen2Mount map[string]*ResourceAzureAdlsGen2Mount `json:"databricks_azure_adls_gen2_mount,omitempty"` AzureBlobMount map[string]*ResourceAzureBlobMount `json:"databricks_azure_blob_mount,omitempty"` Catalog map[string]*ResourceCatalog `json:"databricks_catalog,omitempty"` + CatalogWorkspaceBinding map[string]*ResourceCatalogWorkspaceBinding `json:"databricks_catalog_workspace_binding,omitempty"` Cluster map[string]*ResourceCluster `json:"databricks_cluster,omitempty"` ClusterPolicy map[string]*ResourceClusterPolicy `json:"databricks_cluster_policy,omitempty"` DbfsFile map[string]*ResourceDbfsFile `json:"databricks_dbfs_file,omitempty"` @@ -86,6 +87,7 @@ func NewResources() *Resources { AzureAdlsGen2Mount: make(map[string]*ResourceAzureAdlsGen2Mount), AzureBlobMount: make(map[string]*ResourceAzureBlobMount), Catalog: make(map[string]*ResourceCatalog), + CatalogWorkspaceBinding: make(map[string]*ResourceCatalogWorkspaceBinding), Cluster: make(map[string]*ResourceCluster), ClusterPolicy: make(map[string]*ResourceClusterPolicy), DbfsFile: make(map[string]*ResourceDbfsFile), From 057f3288791e3d9e246ee429aa2cf7986351fe5e Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Fri, 7 Jul 2023 15:00:12 +0200 Subject: [PATCH 08/18] Update inline JSON schema documentation (#557) ## Changes Add docs for experiments and models to the json schema. Update the schema to the latest openapi spec. ## Tests Manually --- bundle/schema/docs/bundle_descriptions.json | 862 ++++++++++++++++---- bundle/schema/openapi.go | 46 +- 2 files changed, 755 insertions(+), 153 deletions(-) diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json index ea7804186..7734614ec 100644 --- a/bundle/schema/docs/bundle_descriptions.json +++ b/bundle/schema/docs/bundle_descriptions.json @@ -20,6 +20,17 @@ "bundle": { "description": "The details for this bundle.", "properties": { + "git": { + "description": "", + "properties": { + "branch": { + "description": "" + }, + "origin_url": { + "description": "" + } + } + }, "name": { "description": "The name of the bundle." } @@ -49,6 +60,17 @@ "bundle": { "description": "The details for this bundle.", "properties": { + "git": { + "description": "", + "properties": { + "branch": { + "description": "" + }, + "origin_url": { + "description": "" + } + } + }, "name": { "description": "The name of the bundle." } @@ -66,22 +88,22 @@ "description": "", "properties": { "artifact_location": { - "description": "" + "description": "Location where artifacts for the experiment are stored." }, "creation_time": { - "description": "" + "description": "Creation time" }, "experiment_id": { - "description": "" + "description": "Unique identifier for the experiment." }, "last_update_time": { - "description": "" + "description": "Last update time" }, "lifecycle_stage": { - "description": "" + "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs." }, "name": { - "description": "" + "description": "Human readable name that identifies the experiment." }, "permissions": { "description": "", @@ -104,15 +126,15 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } @@ -121,15 +143,34 @@ } }, "jobs": { - "description": "List of job definations", + "description": "List of Databricks jobs", "additionalproperties": { "description": "", "properties": { + "compute": { + "description": "A list of compute requirements that can be referenced by tasks of this job.", + "items": { + "description": "", + "properties": { + "compute_key": { + "description": "A unique name for the compute requirement. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine the compute requirements for the task execution." + }, + "spec": { + "description": "", + "properties": { + "kind": { + "description": "The kind of compute described by this compute specification." + } + } + } + } + } + }, "continuous": { - "description": "", + "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -140,7 +181,7 @@ "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." }, "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -152,7 +193,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -199,7 +240,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -287,7 +328,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -326,6 +367,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "Password of the user" + }, + "username": { + "description": "Name of the user" + } + } + }, + "url": { + "description": "URL of the docker image." + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -349,6 +412,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." + } + } + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "properties": { + "destination": { + "description": "dbfs destination, e.g. `dbfs:/my/path`" + } + } + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "properties": { + "canned_acl": { + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." + }, + "destination": { + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." + }, + "enable_encryption": { + "description": "(Optional) Flag to enable server side encryption, `false` by default." + }, + "encryption_type": { + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." + }, + "endpoint": { + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." + }, + "kms_key": { + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." + }, + "region": { + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." + } + } + }, + "workspace": { + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", + "properties": { + "destination": { + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" + } + } + } } } }, @@ -367,6 +483,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "Single user name if data_security_mode is `SINGLE_USER`" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -415,6 +534,31 @@ "name": { "description": "An optional name for the job." }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", + "properties": { + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." + } + } + }, + "parameters": { + "description": "Job-level parameter definitions", + "items": { + "description": "", + "properties": { + "default": { + "description": "Default value of the parameter." + }, + "name": { + "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`" + } + } + } + }, "permissions": { "description": "", "items": { @@ -435,11 +579,22 @@ } } }, + "run_as": { + "description": "", + "properties": { + "service_principal_name": { + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role." + }, + "user_name": { + "description": "The email of an active workspace user. Non-admin users can only set this field to their own email." + } + } + }, "schedule": { "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Whether this trigger is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -460,6 +615,23 @@ "items": { "description": "", "properties": { + "compute_key": { + "description": "The key of the compute requirement, specified in `job.settings.compute`, to use for execution of this task." + }, + "condition_task": { + "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", + "properties": { + "left": { + "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference." + }, + "op": { + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.\n" + }, + "right": { + "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference." + } + } + }, "dbt_task": { "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", "properties": { @@ -487,12 +659,15 @@ } }, "depends_on": { - "description": "", + "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete successfully before executing this task.\nThe key is `task_key`, and the value is the name assigned to the dependent task.\n", "items": { "description": "", "properties": { + "outcome": { + "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run." + }, "task_key": { - "description": "" + "description": "The name of the task this task depends on." } } } @@ -501,13 +676,10 @@ "description": "An optional description for this task.\nThe maximum length is 4096 bytes." }, "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. The default behavior is to not send any emails.", + "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -519,7 +691,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -595,7 +767,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -683,7 +855,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -722,6 +894,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "Password of the user" + }, + "username": { + "description": "Name of the user" + } + } + }, + "url": { + "description": "URL of the docker image." + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -745,6 +939,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." + } + } + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "properties": { + "destination": { + "description": "dbfs destination, e.g. `dbfs:/my/path`" + } + } + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "properties": { + "canned_acl": { + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." + }, + "destination": { + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." + }, + "enable_encryption": { + "description": "(Optional) Flag to enable server side encryption, `false` by default." + }, + "encryption_type": { + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." + }, + "endpoint": { + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." + }, + "kms_key": { + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." + }, + "region": { + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." + } + } + }, + "workspace": { + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", + "properties": { + "destination": { + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" + } + } + } } } }, @@ -763,6 +1010,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "Single user name if data_security_mode is `SINGLE_USER`" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -815,7 +1065,21 @@ "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.\n" }, "source": { - "description": "This describes an enum" + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" + } + } + }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` for this task.", + "properties": { + "alert_on_last_attempt": { + "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run." + }, + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." } } }, @@ -856,11 +1120,14 @@ "retry_on_timeout": { "description": "An optional policy to specify whether to retry a task when it times out. The default behavior is to not retry on timeout." }, + "run_if": { + "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies completed and at least one was executed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed\n" + }, "spark_jar_task": { "description": "If spark_jar_task, indicates that this task must run a JAR.", "properties": { "jar_uri": { - "description": "Deprecated since 04/2016\\\\. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" + "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" }, "main_class_name": { "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail." @@ -883,7 +1150,10 @@ } }, "python_file": { - "description": "" + "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." + }, + "source": { + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" } } }, @@ -954,6 +1224,14 @@ } } }, + "file": { + "description": "If file, indicates that this job runs a SQL file in a remote Git repository. Only one SQL statement is supported in a file. Multiple SQL statements separated by semicolons (;) are not permitted.", + "properties": { + "path": { + "description": "Relative path of the SQL file in the remote Git repository." + } + } + }, "parameters": { "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", "additionalproperties": { @@ -986,24 +1264,24 @@ "description": "An optional timeout applied to each run of this job. The default behavior is to have no timeout." }, "trigger": { - "description": "", + "description": "Trigger settings for the job. Can be used to trigger a run when new files arrive in an external location. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "file_arrival": { - "description": "", + "description": "File arrival trigger settings.", "properties": { - "min_time_between_trigger_seconds": { - "description": "" + "min_time_between_triggers_seconds": { + "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds\n" }, "url": { - "description": "" + "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location." }, "wait_after_last_change_seconds": { - "description": "" + "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds.\n" } } }, "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -1054,74 +1332,74 @@ "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `registered_model` was created." }, "description": { - "description": "" + "description": "Description of this `registered_model`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `registered_model` was last updated." }, "latest_versions": { - "description": "", + "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", "items": { "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `model_version` was created." }, "current_stage": { - "description": "" + "description": "Current stage for this `model_version`." }, "description": { - "description": "" + "description": "Description of this `model_version`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `model_version` was last updated." }, "name": { - "description": "" + "description": "Unique name of the model" }, "run_id": { - "description": "" + "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server." }, "run_link": { - "description": "" + "description": "Run Link: Direct link to the run that generated this version" }, "source": { - "description": "" + "description": "URI indicating the location of the source model artifacts, used when creating `model_version`" }, "status": { - "description": "" + "description": "Current status of `model_version`" }, "status_message": { - "description": "" + "description": "Details on current `status`, if it is pending or failed." }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `model_version`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `model_version`." }, "version": { - "description": "" + "description": "Model's version number." } } } }, "name": { - "description": "" + "description": "Unique name for the model." }, "permissions": { "description": "", @@ -1144,32 +1422,32 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `registered_model`" } } } }, "pipelines": { - "description": "List of pipeline definations", + "description": "List of DLT pipelines", "additionalproperties": { "description": "", "properties": { "catalog": { - "description": "Catalog in UC to add tables to. If target is specified, tables in this pipeline will be\npublished to a \"target\" schema inside catalog (i.e. \u003ccatalog\u003e.\u003ctarget\u003e.\u003ctable\u003e)." + "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog." }, "channel": { "description": "DLT Release Channel that specifies which version to use." @@ -1229,7 +1507,7 @@ } }, "azure_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", "properties": { "availability": { "description": "" @@ -1254,7 +1532,7 @@ } }, "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", "properties": { "dbfs": { "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", @@ -1265,7 +1543,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -1315,6 +1593,9 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." } } }, @@ -1322,7 +1603,7 @@ "description": "The optional ID of the instance pool to which the cluster belongs." }, "label": { - "description": "Cluster label" + "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`." }, "node_type_id": { "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" @@ -1395,18 +1676,18 @@ "description": "", "properties": { "file": { - "description": "", + "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", "properties": { "path": { - "description": "" + "description": "The absolute path of the file." } } }, "jar": { - "description": "URI of the jar to be installed. Currently only DBFS and S3 URIs are supported.\nFor example: `{ \"jar\": \"dbfs:/mnt/databricks/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the jar to be installed. Currently only DBFS is supported.\n" }, "maven": { - "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", + "description": "Specification of a maven library to be installed.\n", "properties": { "coordinates": { "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." @@ -1423,7 +1704,7 @@ } }, "notebook": { - "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\nFor example: `{ \"notebook\" : { \"path\" : \"/my-pipeline-notebook-path\" } }`.\nCurrently, only Scala notebooks are supported, and pipelines must be defined in a package\ncell.", + "description": "The path to a notebook that defines a pipeline and is stored in the \u003cDatabricks\u003e workspace.\n", "properties": { "path": { "description": "The absolute path of the notebook." @@ -1431,7 +1712,7 @@ } }, "whl": { - "description": "URI of the wheel to be installed.\nFor example: `{ \"whl\": \"dbfs:/my/whl\" }` or `{ \"whl\": \"s3://my-bucket/whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the wheel to be installed.\n" } } } @@ -1462,11 +1743,14 @@ "photon": { "description": "Whether Photon is enabled for this pipeline." }, + "serverless": { + "description": "Whether serverless compute is enabled for this pipeline." + }, "storage": { "description": "DBFS root directory for storing checkpoints and tables." }, "target": { - "description": "Target schema (database) to add tables in this pipeline to." + "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`." }, "trigger": { "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", @@ -1492,6 +1776,12 @@ } } }, + "variables": { + "description": "", + "additionalproperties": { + "description": "" + } + }, "workspace": { "description": "Configures which workspace to connect to and locations for files, state, and similar locations within the workspace file tree.", "properties": { @@ -1554,22 +1844,22 @@ "description": "", "properties": { "artifact_location": { - "description": "" + "description": "Location where artifacts for the experiment are stored." }, "creation_time": { - "description": "" + "description": "Creation time" }, "experiment_id": { - "description": "" + "description": "Unique identifier for the experiment." }, "last_update_time": { - "description": "" + "description": "Last update time" }, "lifecycle_stage": { - "description": "" + "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs." }, "name": { - "description": "" + "description": "Human readable name that identifies the experiment." }, "permissions": { "description": "", @@ -1592,15 +1882,15 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } @@ -1609,15 +1899,34 @@ } }, "jobs": { - "description": "List of job definations", + "description": "List of Databricks jobs", "additionalproperties": { "description": "", "properties": { + "compute": { + "description": "A list of compute requirements that can be referenced by tasks of this job.", + "items": { + "description": "", + "properties": { + "compute_key": { + "description": "A unique name for the compute requirement. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine the compute requirements for the task execution." + }, + "spec": { + "description": "", + "properties": { + "kind": { + "description": "The kind of compute described by this compute specification." + } + } + } + } + } + }, "continuous": { - "description": "", + "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -1628,7 +1937,7 @@ "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." }, "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -1640,7 +1949,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -1687,7 +1996,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -1775,7 +2084,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -1814,6 +2123,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "Password of the user" + }, + "username": { + "description": "Name of the user" + } + } + }, + "url": { + "description": "URL of the docker image." + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -1837,6 +2168,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." + } + } + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "properties": { + "destination": { + "description": "dbfs destination, e.g. `dbfs:/my/path`" + } + } + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "properties": { + "canned_acl": { + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." + }, + "destination": { + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." + }, + "enable_encryption": { + "description": "(Optional) Flag to enable server side encryption, `false` by default." + }, + "encryption_type": { + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." + }, + "endpoint": { + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." + }, + "kms_key": { + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." + }, + "region": { + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." + } + } + }, + "workspace": { + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", + "properties": { + "destination": { + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" + } + } + } } } }, @@ -1855,6 +2239,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "Single user name if data_security_mode is `SINGLE_USER`" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -1903,6 +2290,31 @@ "name": { "description": "An optional name for the job." }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", + "properties": { + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." + } + } + }, + "parameters": { + "description": "Job-level parameter definitions", + "items": { + "description": "", + "properties": { + "default": { + "description": "Default value of the parameter." + }, + "name": { + "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`" + } + } + } + }, "permissions": { "description": "", "items": { @@ -1923,11 +2335,22 @@ } } }, + "run_as": { + "description": "", + "properties": { + "service_principal_name": { + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role." + }, + "user_name": { + "description": "The email of an active workspace user. Non-admin users can only set this field to their own email." + } + } + }, "schedule": { "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Whether this trigger is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -1948,6 +2371,23 @@ "items": { "description": "", "properties": { + "compute_key": { + "description": "The key of the compute requirement, specified in `job.settings.compute`, to use for execution of this task." + }, + "condition_task": { + "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", + "properties": { + "left": { + "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference." + }, + "op": { + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.\n" + }, + "right": { + "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference." + } + } + }, "dbt_task": { "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", "properties": { @@ -1975,12 +2415,15 @@ } }, "depends_on": { - "description": "", + "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete successfully before executing this task.\nThe key is `task_key`, and the value is the name assigned to the dependent task.\n", "items": { "description": "", "properties": { + "outcome": { + "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run." + }, "task_key": { - "description": "" + "description": "The name of the task this task depends on." } } } @@ -1989,13 +2432,10 @@ "description": "An optional description for this task.\nThe maximum length is 4096 bytes." }, "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. The default behavior is to not send any emails.", + "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -2007,7 +2447,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -2083,7 +2523,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -2171,7 +2611,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -2210,6 +2650,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "Password of the user" + }, + "username": { + "description": "Name of the user" + } + } + }, + "url": { + "description": "URL of the docker image." + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -2233,6 +2695,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." + } + } + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "properties": { + "destination": { + "description": "dbfs destination, e.g. `dbfs:/my/path`" + } + } + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "properties": { + "canned_acl": { + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." + }, + "destination": { + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." + }, + "enable_encryption": { + "description": "(Optional) Flag to enable server side encryption, `false` by default." + }, + "encryption_type": { + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." + }, + "endpoint": { + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." + }, + "kms_key": { + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." + }, + "region": { + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." + } + } + }, + "workspace": { + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", + "properties": { + "destination": { + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" + } + } + } } } }, @@ -2251,6 +2766,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "Single user name if data_security_mode is `SINGLE_USER`" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -2303,7 +2821,21 @@ "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.\n" }, "source": { - "description": "This describes an enum" + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" + } + } + }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` for this task.", + "properties": { + "alert_on_last_attempt": { + "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run." + }, + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." } } }, @@ -2344,11 +2876,14 @@ "retry_on_timeout": { "description": "An optional policy to specify whether to retry a task when it times out. The default behavior is to not retry on timeout." }, + "run_if": { + "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies completed and at least one was executed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed\n" + }, "spark_jar_task": { "description": "If spark_jar_task, indicates that this task must run a JAR.", "properties": { "jar_uri": { - "description": "Deprecated since 04/2016\\\\. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" + "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" }, "main_class_name": { "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail." @@ -2371,7 +2906,10 @@ } }, "python_file": { - "description": "" + "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." + }, + "source": { + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" } } }, @@ -2442,6 +2980,14 @@ } } }, + "file": { + "description": "If file, indicates that this job runs a SQL file in a remote Git repository. Only one SQL statement is supported in a file. Multiple SQL statements separated by semicolons (;) are not permitted.", + "properties": { + "path": { + "description": "Relative path of the SQL file in the remote Git repository." + } + } + }, "parameters": { "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", "additionalproperties": { @@ -2474,24 +3020,24 @@ "description": "An optional timeout applied to each run of this job. The default behavior is to have no timeout." }, "trigger": { - "description": "", + "description": "Trigger settings for the job. Can be used to trigger a run when new files arrive in an external location. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "file_arrival": { - "description": "", + "description": "File arrival trigger settings.", "properties": { - "min_time_between_trigger_seconds": { - "description": "" + "min_time_between_triggers_seconds": { + "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds\n" }, "url": { - "description": "" + "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location." }, "wait_after_last_change_seconds": { - "description": "" + "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds.\n" } } }, "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -2542,74 +3088,74 @@ "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `registered_model` was created." }, "description": { - "description": "" + "description": "Description of this `registered_model`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `registered_model` was last updated." }, "latest_versions": { - "description": "", + "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", "items": { "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `model_version` was created." }, "current_stage": { - "description": "" + "description": "Current stage for this `model_version`." }, "description": { - "description": "" + "description": "Description of this `model_version`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `model_version` was last updated." }, "name": { - "description": "" + "description": "Unique name of the model" }, "run_id": { - "description": "" + "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server." }, "run_link": { - "description": "" + "description": "Run Link: Direct link to the run that generated this version" }, "source": { - "description": "" + "description": "URI indicating the location of the source model artifacts, used when creating `model_version`" }, "status": { - "description": "" + "description": "Current status of `model_version`" }, "status_message": { - "description": "" + "description": "Details on current `status`, if it is pending or failed." }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `model_version`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `model_version`." }, "version": { - "description": "" + "description": "Model's version number." } } } }, "name": { - "description": "" + "description": "Unique name for the model." }, "permissions": { "description": "", @@ -2632,32 +3178,32 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `registered_model`" } } } }, "pipelines": { - "description": "List of pipeline definations", + "description": "List of DLT pipelines", "additionalproperties": { "description": "", "properties": { "catalog": { - "description": "Catalog in UC to add tables to. If target is specified, tables in this pipeline will be\npublished to a \"target\" schema inside catalog (i.e. \u003ccatalog\u003e.\u003ctarget\u003e.\u003ctable\u003e)." + "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog." }, "channel": { "description": "DLT Release Channel that specifies which version to use." @@ -2717,7 +3263,7 @@ } }, "azure_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", "properties": { "availability": { "description": "" @@ -2742,7 +3288,7 @@ } }, "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", "properties": { "dbfs": { "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", @@ -2753,7 +3299,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -2803,6 +3349,9 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." } } }, @@ -2810,7 +3359,7 @@ "description": "The optional ID of the instance pool to which the cluster belongs." }, "label": { - "description": "Cluster label" + "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`." }, "node_type_id": { "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" @@ -2883,18 +3432,18 @@ "description": "", "properties": { "file": { - "description": "", + "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", "properties": { "path": { - "description": "" + "description": "The absolute path of the file." } } }, "jar": { - "description": "URI of the jar to be installed. Currently only DBFS and S3 URIs are supported.\nFor example: `{ \"jar\": \"dbfs:/mnt/databricks/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the jar to be installed. Currently only DBFS is supported.\n" }, "maven": { - "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", + "description": "Specification of a maven library to be installed.\n", "properties": { "coordinates": { "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." @@ -2911,7 +3460,7 @@ } }, "notebook": { - "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\nFor example: `{ \"notebook\" : { \"path\" : \"/my-pipeline-notebook-path\" } }`.\nCurrently, only Scala notebooks are supported, and pipelines must be defined in a package\ncell.", + "description": "The path to a notebook that defines a pipeline and is stored in the \u003cDatabricks\u003e workspace.\n", "properties": { "path": { "description": "The absolute path of the notebook." @@ -2919,7 +3468,7 @@ } }, "whl": { - "description": "URI of the wheel to be installed.\nFor example: `{ \"whl\": \"dbfs:/my/whl\" }` or `{ \"whl\": \"s3://my-bucket/whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the wheel to be installed.\n" } } } @@ -2950,11 +3499,14 @@ "photon": { "description": "Whether Photon is enabled for this pipeline." }, + "serverless": { + "description": "Whether serverless compute is enabled for this pipeline." + }, "storage": { "description": "DBFS root directory for storing checkpoints and tables." }, "target": { - "description": "Target schema (database) to add tables in this pipeline to." + "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`." }, "trigger": { "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", @@ -2980,6 +3532,20 @@ } } }, + "variables": { + "description": "", + "additionalproperties": { + "description": "", + "properties": { + "default": { + "description": "" + }, + "description": { + "description": "" + } + } + } + }, "workspace": { "description": "Configures which workspace to connect to and locations for files, state, and similar locations within the workspace file tree.", "properties": { diff --git a/bundle/schema/openapi.go b/bundle/schema/openapi.go index 6c2944aab..9b4b27dd9 100644 --- a/bundle/schema/openapi.go +++ b/bundle/schema/openapi.go @@ -162,7 +162,7 @@ func (reader *OpenapiReader) jobsDocs() (*Docs, error) { // TODO: add description for id if needed. // Tracked in https://github.com/databricks/cli/issues/242 jobsDocs := &Docs{ - Description: "List of job definations", + Description: "List of Databricks jobs", AdditionalProperties: jobDocs, } return jobsDocs, nil @@ -177,12 +177,38 @@ func (reader *OpenapiReader) pipelinesDocs() (*Docs, error) { // TODO: Two fields in resources.Pipeline have the json tag id. Clarify the // semantics and then add a description if needed. (https://github.com/databricks/cli/issues/242) pipelinesDocs := &Docs{ - Description: "List of pipeline definations", + Description: "List of DLT pipelines", AdditionalProperties: pipelineDocs, } return pipelinesDocs, nil } +func (reader *OpenapiReader) experimentsDocs() (*Docs, error) { + experimentSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Experiment") + if err != nil { + return nil, err + } + experimentDocs := schemaToDocs(experimentSpecSchema) + experimentsDocs := &Docs{ + Description: "List of MLflow experiments", + AdditionalProperties: experimentDocs, + } + return experimentsDocs, nil +} + +func (reader *OpenapiReader) modelsDocs() (*Docs, error) { + modelSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Model") + if err != nil { + return nil, err + } + modelDocs := schemaToDocs(modelSpecSchema) + modelsDocs := &Docs{ + Description: "List of MLflow models", + AdditionalProperties: modelDocs, + } + return modelsDocs, nil +} + func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) { jobsDocs, err := reader.jobsDocs() if err != nil { @@ -192,12 +218,22 @@ func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) { if err != nil { return nil, err } + experimentsDocs, err := reader.experimentsDocs() + if err != nil { + return nil, err + } + modelsDocs, err := reader.modelsDocs() + if err != nil { + return nil, err + } return &Docs{ - Description: "Specification of databricks resources to instantiate", + Description: "Collection of Databricks resources to deploy.", Properties: map[string]*Docs{ - "jobs": jobsDocs, - "pipelines": pipelinesDocs, + "jobs": jobsDocs, + "pipelines": pipelinesDocs, + "experiments": experimentsDocs, + "models": modelsDocs, }, }, nil } From 47f4d30229f9054053a914f1de6a9a73e0b1bc14 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Fri, 7 Jul 2023 15:10:25 +0200 Subject: [PATCH 09/18] Make top level workspace optional in JSON schema (#562) ## Tests Tested manually. `"workspace"` is no longer a required field in the generated JSON schema Co-authored-by: Pieter Noordhuis --- bundle/config/root.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/config/root.go b/bundle/config/root.go index 8e8325733..189b1a7fd 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -36,7 +36,7 @@ type Root struct { // Workspace contains details about the workspace to connect to // and paths in the workspace tree to use for this bundle. - Workspace Workspace `json:"workspace"` + Workspace Workspace `json:"workspace,omitempty"` // Artifacts contains a description of all code artifacts in this bundle. Artifacts map[string]*Artifact `json:"artifacts,omitempty"` From e11704618cafc5a5893d3d4f256744287cfb44a1 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 10 Jul 2023 13:26:19 +0200 Subject: [PATCH 10/18] Release v0.200.2 (#564) ## Changes CLI: * Fix secrets put-secret command ([#545](https://github.com/databricks/cli/pull/545)). * Fixed ignoring required positional parameters when --json flag is provided ([#535](https://github.com/databricks/cli/pull/535)). * Update cp help message to not require file scheme ([#554](https://github.com/databricks/cli/pull/554)). Bundles: * Fix: bundle destroy fails when bundle.tf.json file is deleted ([#519](https://github.com/databricks/cli/pull/519)). * Fixed error reporting when included invalid files in include section ([#543](https://github.com/databricks/cli/pull/543)). * Make top level workspace optional in JSON schema ([#562](https://github.com/databricks/cli/pull/562)). * Propagate TF_CLI_CONFIG_FILE env variable ([#555](https://github.com/databricks/cli/pull/555)). * Update Terraform provider schema structs ([#563](https://github.com/databricks/cli/pull/563)). * Update inline JSON schema documentation ([#557](https://github.com/databricks/cli/pull/557)). Dependencies: * Bump Go SDK to v0.12.0 ([#540](https://github.com/databricks/cli/pull/540)). * Bump github.com/hashicorp/terraform-json from 0.17.0 to 0.17.1 ([#541](https://github.com/databricks/cli/pull/541)). --- CHANGELOG.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2eae4ac1c..0a7ed72bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,24 @@ # Version changelog +## 0.200.2 + +CLI: +* Fix secrets put-secret command ([#545](https://github.com/databricks/cli/pull/545)). +* Fixed ignoring required positional parameters when --json flag is provided ([#535](https://github.com/databricks/cli/pull/535)). +* Update cp help message to not require file scheme ([#554](https://github.com/databricks/cli/pull/554)). + +Bundles: +* Fix: bundle destroy fails when bundle.tf.json file is deleted ([#519](https://github.com/databricks/cli/pull/519)). +* Fixed error reporting when included invalid files in include section ([#543](https://github.com/databricks/cli/pull/543)). +* Make top level workspace optional in JSON schema ([#562](https://github.com/databricks/cli/pull/562)). +* Propagate TF_CLI_CONFIG_FILE env variable ([#555](https://github.com/databricks/cli/pull/555)). +* Update Terraform provider schema structs ([#563](https://github.com/databricks/cli/pull/563)). +* Update inline JSON schema documentation ([#557](https://github.com/databricks/cli/pull/557)). + +Dependencies: +* Bump Go SDK to v0.12.0 ([#540](https://github.com/databricks/cli/pull/540)). +* Bump github.com/hashicorp/terraform-json from 0.17.0 to 0.17.1 ([#541](https://github.com/databricks/cli/pull/541)). + ## 0.200.1 CLI: From 57e75d3e22f9deb0c0e05c3794d7a12f8220f5bc Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Wed, 12 Jul 2023 08:51:54 +0200 Subject: [PATCH 11/18] Add development runs (#522) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements the "development run" functionality that we desire for DABs in the workspace / IDE. ## bundle.yml changes In bundle.yml, there should be a "dev" environment that is marked as `mode: debug`: ``` environments: dev: default: true mode: development # future accepted values might include pull_request, production ``` Setting `mode` to `development` indicates that this environment is used just for running things for development. This results in several changes to deployed assets: * All assets will get '[dev]' in their name and will get a 'dev' tag * All assets will be hidden from the list of assets (future work; e.g. for jobs we would have a special job_type that hides it from the list) * All deployed assets will be ephemeral (future work, we need some form of garbage collection) * Pipelines will be marked as 'development: true' * Jobs can run on development compute through the `--compute` parameter in the CLI * Jobs get their schedule / triggers paused * Jobs get concurrent runs (it's really annoying if your runs get skipped because the last run was still in progress) Other accepted values for `mode` are `default` (which does nothing) and `pull-request` (which is reserved for future use). ## CLI changes To run a single job called "shark_sighting" on existing compute, use the following commands: ``` $ databricks bundle deploy --compute 0617-201942-9yd9g8ix $ databricks bundle run shark_sighting ``` which would deploy and run a job called "[dev] shark_sightings" on the compute provided. Note that `--compute` is not accepted in production environments, so we show an error if `mode: development` is not used. The `run --deploy` command offers a convenient shorthand for the common combination of deploying & running: ``` $ export DATABRICKS_COMPUTE=0617-201942-9yd9g8ix $ bundle run --deploy shark_sightings ``` The `--deploy` addition isn't really essential and I welcome feedback 🤔 I played with the idea of a "debug" or "dev" command but that seemed to only make the option space even broader for users. The above could work well with an IDE or workspace that automatically sets the target compute. One more thing I added is`run --no-wait` can now be used to run something without waiting for it to be completed (useful for IDE-like environments that can display progress themselves). ``` $ bundle run --deploy shark_sightings --no-wait ``` --- bundle/config/bundle.go | 9 ++ bundle/config/environment.go | 16 +++ bundle/config/mutator/override_compute.go | 56 ++++++++ .../config/mutator/override_compute_test.go | 134 ++++++++++++++++++ .../mutator/process_environment_mode.go | 89 ++++++++++++ .../mutator/process_environment_mode_test.go | 77 ++++++++++ bundle/config/root.go | 8 ++ bundle/config/root_test.go | 9 ++ bundle/phases/initialize.go | 2 + bundle/run/job.go | 9 ++ bundle/run/options.go | 1 + bundle/run/pipeline.go | 4 + bundle/tests/job_and_pipeline/bundle.yml | 1 + bundle/tests/job_and_pipeline_test.go | 2 + cmd/bundle/deploy.go | 3 + cmd/bundle/run.go | 4 + 16 files changed, 424 insertions(+) create mode 100644 bundle/config/mutator/override_compute.go create mode 100644 bundle/config/mutator/override_compute_test.go create mode 100644 bundle/config/mutator/process_environment_mode.go create mode 100644 bundle/config/mutator/process_environment_mode_test.go diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index ba173f101..cf3864775 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -28,4 +28,13 @@ type Bundle struct { // Contains Git information like current commit, current branch and // origin url. Automatically loaded by reading .git directory if not specified Git Git `json:"git,omitempty"` + + // Determines the mode of the environment. + // For example, 'mode: development' can be used for deployments for + // development purposes. + // Annotated readonly as this should be set at the environment level. + Mode Mode `json:"mode,omitempty" bundle:"readonly"` + + // Overrides the compute used for jobs and other supported assets. + ComputeID string `json:"compute_id,omitempty"` } diff --git a/bundle/config/environment.go b/bundle/config/environment.go index 02c6e08c7..06a8d8909 100644 --- a/bundle/config/environment.go +++ b/bundle/config/environment.go @@ -1,5 +1,7 @@ package config +type Mode string + // Environment defines overrides for a single environment. // This structure is recursively merged into the root configuration. type Environment struct { @@ -7,6 +9,14 @@ type Environment struct { // by the user (through environment variable or command line argument). Default bool `json:"default,omitempty"` + // Determines the mode of the environment. + // For example, 'mode: development' can be used for deployments for + // development purposes. + Mode Mode `json:"mode,omitempty"` + + // Overrides the compute used for jobs and other supported assets. + ComputeID string `json:"compute_id,omitempty"` + Bundle *Bundle `json:"bundle,omitempty"` Workspace *Workspace `json:"workspace,omitempty"` @@ -20,3 +30,9 @@ type Environment struct { // in the scope of an environment Variables map[string]string `json:"variables,omitempty"` } + +const ( + // Right now, we just have a default / "" mode and a "development" mode. + // Additional modes are expected to come for pull-requests and production. + Development Mode = "development" +) diff --git a/bundle/config/mutator/override_compute.go b/bundle/config/mutator/override_compute.go new file mode 100644 index 000000000..ba3fd9940 --- /dev/null +++ b/bundle/config/mutator/override_compute.go @@ -0,0 +1,56 @@ +package mutator + +import ( + "context" + "fmt" + "os" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" +) + +type overrideCompute struct{} + +func OverrideCompute() bundle.Mutator { + return &overrideCompute{} +} + +func (m *overrideCompute) Name() string { + return "OverrideCompute" +} + +func overrideJobCompute(j *resources.Job, compute string) { + for i := range j.Tasks { + task := &j.Tasks[i] + if task.NewCluster != nil { + task.NewCluster = nil + task.ExistingClusterId = compute + } else if task.ExistingClusterId != "" { + task.ExistingClusterId = compute + } + } +} + +func (m *overrideCompute) Apply(ctx context.Context, b *bundle.Bundle) error { + if b.Config.Bundle.Mode != config.Development { + if b.Config.Bundle.ComputeID != "" { + return fmt.Errorf("cannot override compute for an environment that does not use 'mode: development'") + } + return nil + } + if os.Getenv("DATABRICKS_CLUSTER_ID") != "" { + b.Config.Bundle.ComputeID = os.Getenv("DATABRICKS_CLUSTER_ID") + } + + if b.Config.Bundle.ComputeID == "" { + return nil + } + + r := b.Config.Resources + for i := range r.Jobs { + overrideJobCompute(r.Jobs[i], b.Config.Bundle.ComputeID) + } + + return nil +} diff --git a/bundle/config/mutator/override_compute_test.go b/bundle/config/mutator/override_compute_test.go new file mode 100644 index 000000000..9eb99edb9 --- /dev/null +++ b/bundle/config/mutator/override_compute_test.go @@ -0,0 +1,134 @@ +package mutator_test + +import ( + "context" + "os" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOverrideDevelopment(t *testing.T) { + os.Setenv("DATABRICKS_CLUSTER_ID", "") + bundle := &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + Mode: config.Development, + ComputeID: "newClusterID", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": {JobSettings: &jobs.JobSettings{ + Name: "job1", + Tasks: []jobs.Task{ + { + NewCluster: &compute.ClusterSpec{}, + }, + { + ExistingClusterId: "cluster2", + }, + }, + }}, + }, + }, + }, + } + + m := mutator.OverrideCompute() + err := m.Apply(context.Background(), bundle) + require.NoError(t, err) + assert.Nil(t, bundle.Config.Resources.Jobs["job1"].Tasks[0].NewCluster) + assert.Equal(t, "newClusterID", bundle.Config.Resources.Jobs["job1"].Tasks[0].ExistingClusterId) + assert.Equal(t, "newClusterID", bundle.Config.Resources.Jobs["job1"].Tasks[1].ExistingClusterId) +} + +func TestOverrideDevelopmentEnv(t *testing.T) { + os.Setenv("DATABRICKS_CLUSTER_ID", "newClusterId") + bundle := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": {JobSettings: &jobs.JobSettings{ + Name: "job1", + Tasks: []jobs.Task{ + { + NewCluster: &compute.ClusterSpec{}, + }, + { + ExistingClusterId: "cluster2", + }, + }, + }}, + }, + }, + }, + } + + m := mutator.OverrideCompute() + err := m.Apply(context.Background(), bundle) + require.NoError(t, err) + assert.Equal(t, "cluster2", bundle.Config.Resources.Jobs["job1"].Tasks[1].ExistingClusterId) +} + +func TestOverrideProduction(t *testing.T) { + bundle := &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + ComputeID: "newClusterID", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": {JobSettings: &jobs.JobSettings{ + Name: "job1", + Tasks: []jobs.Task{ + { + NewCluster: &compute.ClusterSpec{}, + }, + { + ExistingClusterId: "cluster2", + }, + }, + }}, + }, + }, + }, + } + + m := mutator.OverrideCompute() + err := m.Apply(context.Background(), bundle) + require.Error(t, err) +} + +func TestOverrideProductionEnv(t *testing.T) { + os.Setenv("DATABRICKS_CLUSTER_ID", "newClusterId") + bundle := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": {JobSettings: &jobs.JobSettings{ + Name: "job1", + Tasks: []jobs.Task{ + { + NewCluster: &compute.ClusterSpec{}, + }, + { + ExistingClusterId: "cluster2", + }, + }, + }}, + }, + }, + }, + } + + m := mutator.OverrideCompute() + err := m.Apply(context.Background(), bundle) + require.NoError(t, err) +} diff --git a/bundle/config/mutator/process_environment_mode.go b/bundle/config/mutator/process_environment_mode.go new file mode 100644 index 000000000..3e1b7e819 --- /dev/null +++ b/bundle/config/mutator/process_environment_mode.go @@ -0,0 +1,89 @@ +package mutator + +import ( + "context" + "fmt" + "path" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/databricks/databricks-sdk-go/service/ml" +) + +type processEnvironmentMode struct{} + +const developmentConcurrentRuns = 4 + +func ProcessEnvironmentMode() bundle.Mutator { + return &processEnvironmentMode{} +} + +func (m *processEnvironmentMode) Name() string { + return "ProcessEnvironmentMode" +} + +// Mark all resources as being for 'development' purposes, i.e. +// changing their their name, adding tags, and (in the future) +// marking them as 'hidden' in the UI. +func processDevelopmentMode(b *bundle.Bundle) error { + r := b.Config.Resources + + for i := range r.Jobs { + r.Jobs[i].Name = "[dev] " + r.Jobs[i].Name + if r.Jobs[i].Tags == nil { + r.Jobs[i].Tags = make(map[string]string) + } + r.Jobs[i].Tags["dev"] = "" + if r.Jobs[i].MaxConcurrentRuns == 0 { + r.Jobs[i].MaxConcurrentRuns = developmentConcurrentRuns + } + if r.Jobs[i].Schedule != nil { + r.Jobs[i].Schedule.PauseStatus = jobs.PauseStatusPaused + } + if r.Jobs[i].Continuous != nil { + r.Jobs[i].Continuous.PauseStatus = jobs.PauseStatusPaused + } + if r.Jobs[i].Trigger != nil { + r.Jobs[i].Trigger.PauseStatus = jobs.PauseStatusPaused + } + } + + for i := range r.Pipelines { + r.Pipelines[i].Name = "[dev] " + r.Pipelines[i].Name + r.Pipelines[i].Development = true + // (pipelines don't yet support tags) + } + + for i := range r.Models { + r.Models[i].Name = "[dev] " + r.Models[i].Name + r.Models[i].Tags = append(r.Models[i].Tags, ml.ModelTag{Key: "dev", Value: ""}) + } + + for i := range r.Experiments { + filepath := r.Experiments[i].Name + dir := path.Dir(filepath) + base := path.Base(filepath) + if dir == "." { + r.Experiments[i].Name = "[dev] " + base + } else { + r.Experiments[i].Name = dir + "/[dev] " + base + } + r.Experiments[i].Tags = append(r.Experiments[i].Tags, ml.ExperimentTag{Key: "dev", Value: ""}) + } + + return nil +} + +func (m *processEnvironmentMode) Apply(ctx context.Context, b *bundle.Bundle) error { + switch b.Config.Bundle.Mode { + case config.Development: + return processDevelopmentMode(b) + case "": + // No action + default: + return fmt.Errorf("unsupported value specified for 'mode': %s", b.Config.Bundle.Mode) + } + + return nil +} diff --git a/bundle/config/mutator/process_environment_mode_test.go b/bundle/config/mutator/process_environment_mode_test.go new file mode 100644 index 000000000..5342de212 --- /dev/null +++ b/bundle/config/mutator/process_environment_mode_test.go @@ -0,0 +1,77 @@ +package mutator_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/databricks/databricks-sdk-go/service/ml" + "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestProcessEnvironmentModeApplyDebug(t *testing.T) { + bundle := &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + Mode: config.Development, + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": {JobSettings: &jobs.JobSettings{Name: "job1"}}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": {PipelineSpec: &pipelines.PipelineSpec{Name: "pipeline1"}}, + }, + Experiments: map[string]*resources.MlflowExperiment{ + "experiment1": {Experiment: &ml.Experiment{Name: "/Users/lennart.kats@databricks.com/experiment1"}}, + "experiment2": {Experiment: &ml.Experiment{Name: "experiment2"}}, + }, + Models: map[string]*resources.MlflowModel{ + "model1": {Model: &ml.Model{Name: "model1"}}, + }, + }, + }, + } + + m := mutator.ProcessEnvironmentMode() + err := m.Apply(context.Background(), bundle) + require.NoError(t, err) + assert.Equal(t, "[dev] job1", bundle.Config.Resources.Jobs["job1"].Name) + assert.Equal(t, "[dev] pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name) + assert.Equal(t, "/Users/lennart.kats@databricks.com/[dev] experiment1", bundle.Config.Resources.Experiments["experiment1"].Name) + assert.Equal(t, "[dev] experiment2", bundle.Config.Resources.Experiments["experiment2"].Name) + assert.Equal(t, "[dev] model1", bundle.Config.Resources.Models["model1"].Name) + assert.Equal(t, "dev", bundle.Config.Resources.Experiments["experiment1"].Experiment.Tags[0].Key) + assert.True(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) +} + +func TestProcessEnvironmentModeApplyDefault(t *testing.T) { + bundle := &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + Mode: "", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": {JobSettings: &jobs.JobSettings{Name: "job1"}}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": {PipelineSpec: &pipelines.PipelineSpec{Name: "pipeline1"}}, + }, + }, + }, + } + + m := mutator.ProcessEnvironmentMode() + err := m.Apply(context.Background(), bundle) + require.NoError(t, err) + assert.Equal(t, "job1", bundle.Config.Resources.Jobs["job1"].Name) + assert.Equal(t, "pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name) + assert.False(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) +} diff --git a/bundle/config/root.go b/bundle/config/root.go index 189b1a7fd..5ee337d30 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -190,5 +190,13 @@ func (r *Root) MergeEnvironment(env *Environment) error { } } + if env.Mode != "" { + r.Bundle.Mode = env.Mode + } + + if env.ComputeID != "" { + r.Bundle.ComputeID = env.ComputeID + } + return nil } diff --git a/bundle/config/root_test.go b/bundle/config/root_test.go index b53f1ab77..818e89a2d 100644 --- a/bundle/config/root_test.go +++ b/bundle/config/root_test.go @@ -154,3 +154,12 @@ func TestInitializeVariablesUndefinedVariables(t *testing.T) { err := root.InitializeVariables([]string{"bar=567"}) assert.ErrorContains(t, err, "variable bar has not been defined") } + +func TestRootMergeEnvironmentWithMode(t *testing.T) { + root := &Root{ + Bundle: Bundle{}, + } + env := &Environment{Mode: Development} + require.NoError(t, root.MergeEnvironment(env)) + assert.Equal(t, Development, root.Bundle.Mode) +} diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 36d251547..fc5056f63 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -25,6 +25,8 @@ func Initialize() bundle.Mutator { interpolation.IncludeLookupsInPath("workspace"), interpolation.IncludeLookupsInPath(variable.VariableReferencePrefix), ), + mutator.OverrideCompute(), + mutator.ProcessEnvironmentMode(), mutator.TranslatePaths(), terraform.Initialize(), }, diff --git a/bundle/run/job.go b/bundle/run/job.go index b5ada9461..f152a17d0 100644 --- a/bundle/run/job.go +++ b/bundle/run/job.go @@ -243,6 +243,15 @@ func (r *jobRunner) Run(ctx context.Context, opts *Options) (output.RunOutput, e if err != nil { return nil, fmt.Errorf("cannot start job") } + + if opts.NoWait { + details, err := w.Jobs.GetRun(ctx, jobs.GetRunRequest{ + RunId: waiter.RunId, + }) + progressLogger.Log(progress.NewJobRunUrlEvent(details.RunPageUrl)) + return nil, err + } + run, err := waiter.OnProgress(func(r *jobs.Run) { pullRunId(r) logDebug(r) diff --git a/bundle/run/options.go b/bundle/run/options.go index cc9dd413e..3194fb328 100644 --- a/bundle/run/options.go +++ b/bundle/run/options.go @@ -7,6 +7,7 @@ import ( type Options struct { Job JobOptions Pipeline PipelineOptions + NoWait bool } func (o *Options) Define(fs *flag.FlagSet) { diff --git a/bundle/run/pipeline.go b/bundle/run/pipeline.go index 621da0715..7b82c3eae 100644 --- a/bundle/run/pipeline.go +++ b/bundle/run/pipeline.go @@ -170,6 +170,10 @@ func (r *pipelineRunner) Run(ctx context.Context, opts *Options) (output.RunOutp // Log the pipeline update URL as soon as it is available. progressLogger.Log(progress.NewPipelineUpdateUrlEvent(w.Config.Host, updateID, pipelineID)) + if opts.NoWait { + return nil, nil + } + // Poll update for completion and post status. // Note: there is no "StartUpdateAndWait" wrapper for this API. var prevState *pipelines.UpdateInfoState diff --git a/bundle/tests/job_and_pipeline/bundle.yml b/bundle/tests/job_and_pipeline/bundle.yml index f4a5719af..d6942e8a7 100644 --- a/bundle/tests/job_and_pipeline/bundle.yml +++ b/bundle/tests/job_and_pipeline/bundle.yml @@ -8,6 +8,7 @@ resources: environments: development: + mode: development resources: pipelines: nyc_taxi_pipeline: diff --git a/bundle/tests/job_and_pipeline_test.go b/bundle/tests/job_and_pipeline_test.go index 8fc032a5f..775f415c2 100644 --- a/bundle/tests/job_and_pipeline_test.go +++ b/bundle/tests/job_and_pipeline_test.go @@ -4,6 +4,7 @@ import ( "path/filepath" "testing" + "github.com/databricks/cli/bundle/config" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -15,6 +16,7 @@ func TestJobAndPipelineDevelopment(t *testing.T) { p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"] assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(p.ConfigFilePath)) + assert.Equal(t, b.Config.Bundle.Mode, config.Development) assert.True(t, p.Development) require.Len(t, p.Libraries, 1) assert.Equal(t, "./dlt/nyc_taxi_loader", p.Libraries[0].Notebook.Path) diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index 7dee32da3..e8c0d3958 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -16,6 +16,7 @@ var deployCmd = &cobra.Command{ // If `--force` is specified, force acquisition of the deployment lock. b.Config.Bundle.Lock.Force = forceDeploy + b.Config.Bundle.ComputeID = computeID return bundle.Apply(cmd.Context(), b, bundle.Seq( phases.Initialize(), @@ -26,8 +27,10 @@ var deployCmd = &cobra.Command{ } var forceDeploy bool +var computeID string func init() { AddCommand(deployCmd) deployCmd.Flags().BoolVar(&forceDeploy, "force", false, "Force acquisition of deployment lock.") + deployCmd.Flags().StringVarP(&computeID, "compute-id", "c", "", "Override compute in the deployment with the given compute ID.") } diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index 1eb7aa4ba..9ca8fe456 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -14,6 +14,7 @@ import ( ) var runOptions run.Options +var noWait bool var runCmd = &cobra.Command{ Use: "run [flags] KEY", @@ -23,6 +24,7 @@ var runCmd = &cobra.Command{ PreRunE: ConfigureBundleWithVariables, RunE: func(cmd *cobra.Command, args []string) error { b := bundle.Get(cmd.Context()) + err := bundle.Apply(cmd.Context(), b, bundle.Seq( phases.Initialize(), terraform.Interpolate(), @@ -39,6 +41,7 @@ var runCmd = &cobra.Command{ return err } + runOptions.NoWait = noWait output, err := runner.Run(cmd.Context(), &runOptions) if err != nil { return err @@ -89,4 +92,5 @@ var runCmd = &cobra.Command{ func init() { runOptions.Define(runCmd.Flags()) rootCmd.AddCommand(runCmd) + runCmd.Flags().BoolVar(&noWait, "no-wait", false, "Don't wait for the run to complete.") } From f203731fe69b047b0d26fdd28c422fe5af8e901c Mon Sep 17 00:00:00 2001 From: Miles Yucht Date: Wed, 12 Jul 2023 12:05:51 +0200 Subject: [PATCH 12/18] Support tab completion for profiles (#572) ## Changes Currently, `databricks --profile ` autocompletes with the shell default behavior, listing files in the local directory. This is not a great experience. Especially given that the suggested profile names for accounts are so long, it can be cumbersome to type them out by hand. This PR configures autocompletion for `--profile` to inspect the profiles of ~/.databrickscfg. One potential improvement is to filter the response based on whether the command is known to be account-level or workspace-level. ## Tests Manual test. Screenshot_11_07_2023__18_31 --- cmd/root/auth.go | 1 + libs/databrickscfg/profiles.go | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/cmd/root/auth.go b/cmd/root/auth.go index 61068ab38..ae7f73968 100644 --- a/cmd/root/auth.go +++ b/cmd/root/auth.go @@ -23,6 +23,7 @@ var currentUser int func init() { RootCmd.PersistentFlags().StringP("profile", "p", "", "~/.databrickscfg profile") + RootCmd.RegisterFlagCompletionFunc("profile", databrickscfg.ProfileCompletion) } func MustAccountClient(cmd *cobra.Command, args []string) error { diff --git a/libs/databrickscfg/profiles.go b/libs/databrickscfg/profiles.go index 60b2a89a2..7892bddd1 100644 --- a/libs/databrickscfg/profiles.go +++ b/libs/databrickscfg/profiles.go @@ -5,6 +5,7 @@ import ( "strings" "github.com/databricks/databricks-sdk-go/config" + "github.com/spf13/cobra" ) // Profile holds a subset of the keys in a databrickscfg profile. @@ -59,6 +60,10 @@ func MatchAccountProfiles(p Profile) bool { return p.Host != "" && p.AccountID != "" } +func MatchAllProfiles(p Profile) bool { + return true +} + const DefaultPath = "~/.databrickscfg" func LoadProfiles(path string, fn ProfileMatchFunction) (file string, profiles Profiles, err error) { @@ -99,3 +104,11 @@ func LoadProfiles(path string, fn ProfileMatchFunction) (file string, profiles P return } + +func ProfileCompletion(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + _, profiles, err := LoadProfiles(DefaultPath, MatchAllProfiles) + if err != nil { + return nil, cobra.ShellCompDirectiveError + } + return profiles.Names(), cobra.ShellCompDirectiveNoFileComp +} From d2b7c6d611b562713a87339f04a544a6919d57fa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Jul 2023 11:36:07 +0000 Subject: [PATCH 13/18] Bump golang.org/x/term from 0.9.0 to 0.10.0 (#567) --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index c1d836696..6abeb918d 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( golang.org/x/mod v0.11.0 golang.org/x/oauth2 v0.9.0 golang.org/x/sync v0.3.0 - golang.org/x/term v0.9.0 + golang.org/x/term v0.10.0 golang.org/x/text v0.10.0 gopkg.in/ini.v1 v1.67.0 // Apache 2.0 ) @@ -52,7 +52,7 @@ require ( go.opencensus.io v0.24.0 // indirect golang.org/x/crypto v0.10.0 // indirect golang.org/x/net v0.11.0 // indirect - golang.org/x/sys v0.9.0 // indirect + golang.org/x/sys v0.10.0 // indirect golang.org/x/time v0.3.0 // indirect google.golang.org/api v0.129.0 // indirect google.golang.org/appengine v1.6.7 // indirect diff --git a/go.sum b/go.sum index 076e9f282..4e8131d5b 100644 --- a/go.sum +++ b/go.sum @@ -217,12 +217,12 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= -golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.9.0 h1:GRRCnKYhdQrD8kfRAdQ6Zcw1P0OcELxGLKJvtjVMZ28= -golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo= +golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c= +golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= From d28133fd3ef45e3d84185dba3fa27aa561963b90 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Jul 2023 11:42:46 +0000 Subject: [PATCH 14/18] Bump golang.org/x/oauth2 from 0.9.0 to 0.10.0 (#566) --- go.mod | 8 ++++---- go.sum | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 6abeb918d..339f31ec7 100644 --- a/go.mod +++ b/go.mod @@ -24,10 +24,10 @@ require ( github.com/whilp/git-urls v1.0.0 // MIT golang.org/x/exp v0.0.0-20230310171629-522b1b587ee0 golang.org/x/mod v0.11.0 - golang.org/x/oauth2 v0.9.0 + golang.org/x/oauth2 v0.10.0 golang.org/x/sync v0.3.0 golang.org/x/term v0.10.0 - golang.org/x/text v0.10.0 + golang.org/x/text v0.11.0 gopkg.in/ini.v1 v1.67.0 // Apache 2.0 ) @@ -50,8 +50,8 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/zclconf/go-cty v1.13.2 // indirect go.opencensus.io v0.24.0 // indirect - golang.org/x/crypto v0.10.0 // indirect - golang.org/x/net v0.11.0 // indirect + golang.org/x/crypto v0.11.0 // indirect + golang.org/x/net v0.12.0 // indirect golang.org/x/sys v0.10.0 // indirect golang.org/x/time v0.3.0 // indirect google.golang.org/api v0.129.0 // indirect diff --git a/go.sum b/go.sum index 4e8131d5b..89cf9ad40 100644 --- a/go.sum +++ b/go.sum @@ -163,8 +163,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220314234659-1baeb1ce4c0b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM= -golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I= +golang.org/x/crypto v0.11.0 h1:6Ewdq3tDic1mg5xRO4milcWCfMVQhI4NkqWWvqejpuA= +golang.org/x/crypto v0.11.0/go.mod h1:xgJhtzW8F9jGdVFWZESrid1U1bjeNy4zgy5cRr/CIio= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20230310171629-522b1b587ee0 h1:LGJsf5LRplCck6jUCH3dBL2dmycNruWNF5xugkSlfXw= golang.org/x/exp v0.0.0-20230310171629-522b1b587ee0/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= @@ -187,12 +187,12 @@ golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.11.0 h1:Gi2tvZIJyBtO9SDr1q9h5hEQCp/4L2RQ+ar0qjx2oNU= -golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ= +golang.org/x/net v0.12.0 h1:cfawfvKITfUsFCeJIHJrbSxpeu/E81khclypR0GVT50= +golang.org/x/net v0.12.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.9.0 h1:BPpt2kU7oMRq3kCHAA1tbSEshXRw1LpG2ztgDwrzuAs= -golang.org/x/oauth2 v0.9.0/go.mod h1:qYgFZaFiu6Wg24azG8bdV52QJXJGbZzIIsRCdVKzbLw= +golang.org/x/oauth2 v0.10.0 h1:zHCpF2Khkwy4mMB4bv0U37YtJdTGW8jI0glAApi0Kh8= +golang.org/x/oauth2 v0.10.0/go.mod h1:kTpgurOux7LqtuxjuyZa4Gj2gdezIt/jQtGnNFfypQI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -229,8 +229,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= -golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58= -golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= +golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= From 14cfc80666a48a7d9faff2dc50d2c2701223476f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Jul 2023 11:49:35 +0000 Subject: [PATCH 15/18] Bump golang.org/x/mod from 0.11.0 to 0.12.0 (#568) --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 339f31ec7..610404bb4 100644 --- a/go.mod +++ b/go.mod @@ -23,7 +23,7 @@ require ( github.com/stretchr/testify v1.8.4 // MIT github.com/whilp/git-urls v1.0.0 // MIT golang.org/x/exp v0.0.0-20230310171629-522b1b587ee0 - golang.org/x/mod v0.11.0 + golang.org/x/mod v0.12.0 golang.org/x/oauth2 v0.10.0 golang.org/x/sync v0.3.0 golang.org/x/term v0.10.0 diff --git a/go.sum b/go.sum index 89cf9ad40..176d39254 100644 --- a/go.sum +++ b/go.sum @@ -172,8 +172,8 @@ golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTk golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.11.0 h1:bUO06HqtnRcc/7l71XBe4WcqTZ+3AH1J59zWDDwLKgU= -golang.org/x/mod v0.11.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= From 650fb0e8b61ff1ad1cc52f76ee2b013635674821 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 12 Jul 2023 14:09:25 +0200 Subject: [PATCH 16/18] Correctly use --profile flag passed for all bundle commands (#571) ## Changes Correctly use --profile flag passed for all bundle commands. Also adds a validation that if bundle configured host mismatches provided profile, it throws an error. Co-authored-by: Pieter Noordhuis --- bundle/config/workspace.go | 12 ++-- cmd/root/bundle.go | 19 ++++++ cmd/root/bundle_test.go | 119 +++++++++++++++++++++++++++++++++++ libs/databrickscfg/loader.go | 2 +- libs/databrickscfg/ops.go | 24 +++++++ 5 files changed, 171 insertions(+), 5 deletions(-) create mode 100644 cmd/root/bundle_test.go diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 8a1205b3b..1b6dc4cd5 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -75,12 +75,9 @@ func (w *Workspace) Client() (*databricks.WorkspaceClient, error) { AzureLoginAppID: w.AzureLoginAppID, } - // HACKY fix to not used host based auth when the profile is already set - profile := os.Getenv("DATABRICKS_CONFIG_PROFILE") - // If only the host is configured, we try and unambiguously match it to // a profile in the user's databrickscfg file. Override the default loaders. - if w.Host != "" && w.Profile == "" && profile == "" { + if w.Host != "" && w.Profile == "" { cfg.Loaders = []config.Loader{ // Load auth creds from env vars config.ConfigAttributes, @@ -91,6 +88,13 @@ func (w *Workspace) Client() (*databricks.WorkspaceClient, error) { } } + if w.Profile != "" && w.Host != "" { + err := databrickscfg.ValidateConfigAndProfileHost(&cfg, w.Profile) + if err != nil { + return nil, err + } + } + return databricks.NewWorkspaceClient(&cfg) } diff --git a/cmd/root/bundle.go b/cmd/root/bundle.go index 737651242..8eab7c2c7 100644 --- a/cmd/root/bundle.go +++ b/cmd/root/bundle.go @@ -26,6 +26,20 @@ func getEnvironment(cmd *cobra.Command) (value string) { return os.Getenv(envName) } +func getProfile(cmd *cobra.Command) (value string) { + // The command line flag takes precedence. + flag := cmd.Flag("profile") + if flag != nil { + value = flag.Value.String() + if value != "" { + return + } + } + + // If it's not set, use the environment variable. + return os.Getenv("DATABRICKS_CONFIG_PROFILE") +} + // loadBundle loads the bundle configuration and applies default mutators. func loadBundle(cmd *cobra.Command, args []string, load func() (*bundle.Bundle, error)) (*bundle.Bundle, error) { b, err := load() @@ -38,6 +52,11 @@ func loadBundle(cmd *cobra.Command, args []string, load func() (*bundle.Bundle, return nil, nil } + profile := getProfile(cmd) + if profile != "" { + b.Config.Workspace.Profile = profile + } + ctx := cmd.Context() err = bundle.Apply(ctx, b, bundle.Seq(mutator.DefaultMutators()...)) if err != nil { diff --git a/cmd/root/bundle_test.go b/cmd/root/bundle_test.go new file mode 100644 index 000000000..8dc771bd4 --- /dev/null +++ b/cmd/root/bundle_test.go @@ -0,0 +1,119 @@ +package root + +import ( + "context" + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/stretchr/testify/assert" +) + +func setupDatabricksCfg(t *testing.T) { + tempHomeDir := t.TempDir() + homeEnvVar := "HOME" + if runtime.GOOS == "windows" { + homeEnvVar = "USERPROFILE" + } + + cfg := []byte("[PROFILE-1]\nhost = https://a.com\ntoken = a\n[PROFILE-2]\nhost = https://a.com\ntoken = b\n") + err := os.WriteFile(filepath.Join(tempHomeDir, ".databrickscfg"), cfg, 0644) + assert.NoError(t, err) + + t.Setenv("DATABRICKS_CONFIG_FILE", "") + t.Setenv(homeEnvVar, tempHomeDir) +} + +func setup(t *testing.T, host string) *bundle.Bundle { + setupDatabricksCfg(t) + + ctx := context.Background() + RootCmd.SetContext(ctx) + _, err := initializeLogger(ctx) + assert.NoError(t, err) + + err = configureBundle(RootCmd, []string{"validate"}, func() (*bundle.Bundle, error) { + return &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + Name: "test", + }, + Workspace: config.Workspace{ + Host: host, + }, + }, + }, nil + }) + assert.NoError(t, err) + + return bundle.Get(RootCmd.Context()) +} + +func TestBundleConfigureDefault(t *testing.T) { + b := setup(t, "https://x.com") + assert.NotPanics(t, func() { + b.WorkspaceClient() + }) +} + +func TestBundleConfigureWithMultipleMatches(t *testing.T) { + b := setup(t, "https://a.com") + assert.Panics(t, func() { + b.WorkspaceClient() + }) +} + +func TestBundleConfigureWithNonExistentProfileFlag(t *testing.T) { + RootCmd.Flag("profile").Value.Set("NOEXIST") + + b := setup(t, "https://x.com") + assert.PanicsWithError(t, "no matching config profiles found", func() { + b.WorkspaceClient() + }) +} + +func TestBundleConfigureWithMismatchedProfile(t *testing.T) { + RootCmd.Flag("profile").Value.Set("PROFILE-1") + + b := setup(t, "https://x.com") + assert.PanicsWithError(t, "config host mismatch: profile uses host https://a.com, but CLI configured to use https://x.com", func() { + b.WorkspaceClient() + }) +} + +func TestBundleConfigureWithCorrectProfile(t *testing.T) { + RootCmd.Flag("profile").Value.Set("PROFILE-1") + + b := setup(t, "https://a.com") + assert.NotPanics(t, func() { + b.WorkspaceClient() + }) +} + +func TestBundleConfigureWithMismatchedProfileEnvVariable(t *testing.T) { + t.Setenv("DATABRICKS_CONFIG_PROFILE", "PROFILE-1") + t.Cleanup(func() { + t.Setenv("DATABRICKS_CONFIG_PROFILE", "") + }) + + b := setup(t, "https://x.com") + assert.PanicsWithError(t, "config host mismatch: profile uses host https://a.com, but CLI configured to use https://x.com", func() { + b.WorkspaceClient() + }) +} + +func TestBundleConfigureWithProfileFlagAndEnvVariable(t *testing.T) { + t.Setenv("DATABRICKS_CONFIG_PROFILE", "NOEXIST") + t.Cleanup(func() { + t.Setenv("DATABRICKS_CONFIG_PROFILE", "") + }) + RootCmd.Flag("profile").Value.Set("PROFILE-1") + + b := setup(t, "https://a.com") + assert.NotPanics(t, func() { + b.WorkspaceClient() + }) +} diff --git a/libs/databrickscfg/loader.go b/libs/databrickscfg/loader.go index 8179703a3..05698eb48 100644 --- a/libs/databrickscfg/loader.go +++ b/libs/databrickscfg/loader.go @@ -90,7 +90,7 @@ func (l profileFromHostLoader) Configure(cfg *config.Config) error { } if err, ok := err.(errMultipleProfiles); ok { return fmt.Errorf( - "%s: %w: please set DATABRICKS_CONFIG_PROFILE to specify one", + "%s: %w: please set DATABRICKS_CONFIG_PROFILE or provide --profile flag to specify one", host, err) } if err != nil { diff --git a/libs/databrickscfg/ops.go b/libs/databrickscfg/ops.go index 4a4a27b06..c2d6e9fa1 100644 --- a/libs/databrickscfg/ops.go +++ b/libs/databrickscfg/ops.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/config" "gopkg.in/ini.v1" ) @@ -129,6 +130,29 @@ func SaveToProfile(ctx context.Context, cfg *config.Config) error { return configFile.SaveTo(configFile.Path()) } +func ValidateConfigAndProfileHost(cfg *databricks.Config, profile string) error { + configFile, err := config.LoadFile(cfg.ConfigFile) + if err != nil { + return fmt.Errorf("cannot parse config file: %w", err) + } + // Normalized version of the configured host. + host := normalizeHost(cfg.Host) + match, err := findMatchingProfile(configFile, func(s *ini.Section) bool { + return profile == s.Name() + }) + + if err != nil { + return err + } + + hostFromProfile := normalizeHost(match.Key("host").Value()) + if hostFromProfile != "" && host != "" && hostFromProfile != host { + return fmt.Errorf("config host mismatch: profile uses host %s, but CLI configured to use %s", hostFromProfile, host) + } + + return nil +} + func init() { // We document databrickscfg files with a [DEFAULT] header and wish to keep it that way. // This, however, does mean we emit a [DEFAULT] section even if it's empty. From f00488d81d953068ba04c88a97e8d1055c27c309 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 12 Jul 2023 14:25:00 +0200 Subject: [PATCH 17/18] Disallow notebooks in paths where files are expected (#573) ## Changes Uploading a notebook strips it's file extension. This PR returns an error if a notebook is specified where a file is expected. For example: A spark python task in a job or `libraries.file.path` DLT library (where instead `libraries.notebook.path` should be used This PR also adds test coverage for the opposite case, when files are not notebooks where notebooks are expected. ## Tests Integration tests and manually --- bundle/config/mutator/translate_paths.go | 39 ++++- bundle/config/mutator/translate_paths_test.go | 140 ++++++++++++++++++ 2 files changed, 175 insertions(+), 4 deletions(-) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index a7ccb3e9e..08f839861 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -2,6 +2,7 @@ package mutator import ( "context" + "errors" "fmt" "os" "path" @@ -14,6 +15,22 @@ import ( "github.com/databricks/databricks-sdk-go/service/pipelines" ) +type ErrIsNotebook struct { + path string +} + +func (err ErrIsNotebook) Error() string { + return fmt.Sprintf("file at %s is a notebook", err.path) +} + +type ErrIsNotNotebook struct { + path string +} + +func (err ErrIsNotNotebook) Error() string { + return fmt.Sprintf("file at %s is not a notebook", err.path) +} + type translatePaths struct { seen map[string]string } @@ -86,7 +103,7 @@ func (m *translatePaths) translateNotebookPath(literal, localPath, remotePath st return "", fmt.Errorf("unable to determine if %s is a notebook: %w", localPath, err) } if !nb { - return "", fmt.Errorf("file at %s is not a notebook", localPath) + return "", ErrIsNotNotebook{localPath} } // Upon import, notebooks are stripped of their extension. @@ -94,14 +111,16 @@ func (m *translatePaths) translateNotebookPath(literal, localPath, remotePath st } func (m *translatePaths) translateFilePath(literal, localPath, remotePath string) (string, error) { - _, err := os.Stat(localPath) + nb, _, err := notebook.Detect(localPath) if os.IsNotExist(err) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to access %s: %w", localPath, err) + return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", localPath, err) + } + if nb { + return "", ErrIsNotebook{localPath} } - return remotePath, nil } @@ -110,6 +129,9 @@ func (m *translatePaths) translateJobTask(dir string, b *bundle.Bundle, task *jo if task.NotebookTask != nil { err = m.rewritePath(dir, b, &task.NotebookTask.NotebookPath, m.translateNotebookPath) + if target := (&ErrIsNotNotebook{}); errors.As(err, target) { + return fmt.Errorf(`expected a notebook for "tasks.notebook_task.notebook_path" but got a file: %w`, target) + } if err != nil { return err } @@ -117,6 +139,9 @@ func (m *translatePaths) translateJobTask(dir string, b *bundle.Bundle, task *jo if task.SparkPythonTask != nil { err = m.rewritePath(dir, b, &task.SparkPythonTask.PythonFile, m.translateFilePath) + if target := (&ErrIsNotebook{}); errors.As(err, target) { + return fmt.Errorf(`expected a file for "tasks.spark_python_task.python_file" but got a notebook: %w`, target) + } if err != nil { return err } @@ -130,6 +155,9 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle, if library.Notebook != nil { err = m.rewritePath(dir, b, &library.Notebook.Path, m.translateNotebookPath) + if target := (&ErrIsNotNotebook{}); errors.As(err, target) { + return fmt.Errorf(`expected a notebook for "libraries.notebook.path" but got a file: %w`, target) + } if err != nil { return err } @@ -137,6 +165,9 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle, if library.File != nil { err = m.rewritePath(dir, b, &library.File.Path, m.translateFilePath) + if target := (&ErrIsNotebook{}); errors.As(err, target) { + return fmt.Errorf(`expected a file for "libraries.file.path" but got a notebook: %w`, target) + } if err != nil { return err } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 1bcb8b1b2..b87f4f676 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -455,3 +455,143 @@ func TestPipelineFileDoesNotExistError(t *testing.T) { err := mutator.TranslatePaths().Apply(context.Background(), bundle) assert.EqualError(t, err, "file ./doesnt_exist.py not found") } + +func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) { + dir := t.TempDir() + touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + SparkPythonTask: &jobs.SparkPythonTask{ + PythonFile: "./my_notebook.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, `expected a file for "tasks.spark_python_task.python_file" but got a notebook`) +} + +func TestJobNotebookTaskWithFileSourceError(t *testing.T) { + dir := t.TempDir() + touchEmptyFile(t, filepath.Join(dir, "my_file.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "./my_file.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, `expected a notebook for "tasks.notebook_task.notebook_path" but got a file`) +} + +func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) { + dir := t.TempDir() + touchEmptyFile(t, filepath.Join(dir, "my_file.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "pipeline": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + PipelineSpec: &pipelines.PipelineSpec{ + Libraries: []pipelines.PipelineLibrary{ + { + Notebook: &pipelines.NotebookLibrary{ + Path: "./my_file.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, `expected a notebook for "libraries.notebook.path" but got a file`) +} + +func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) { + dir := t.TempDir() + touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "pipeline": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + PipelineSpec: &pipelines.PipelineSpec{ + Libraries: []pipelines.PipelineLibrary{ + { + File: &pipelines.FileLibrary{ + Path: "./my_notebook.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, `expected a file for "libraries.file.path" but got a notebook`) +} From 9a0888126c8a30433a1d40d710e1162bc02a7bd2 Mon Sep 17 00:00:00 2001 From: Miles Yucht Date: Wed, 12 Jul 2023 17:36:09 +0200 Subject: [PATCH 18/18] Improve auth login experience (#570) ## Changes Currently, `databricks auth login` is difficult to use. If a user types this command in, the command fails with ``` Error: init: cannot fetch credentials ``` after prompting for a profile name. To make this experience smoother, this change ensures that the host, and if necessary, the account ID, are prompted for input from the user if they aren't provided on the CLI. ## Tests Manual tests: ``` $ ./cli auth token Databricks Host: https://.staging.cloud.databricks.com { "access_token": "...", "token_type": "Bearer", "expiry": "2023-07-11T12:56:59.929671+02:00" } $ ./cli auth login Databricks Host: https://.staging.cloud.databricks.com Databricks Profile Name: -test Profile -test was successfully saved $ ./cli auth login Databricks Host: https://accounts.cloud.databricks.com Databricks Account ID: Databricks Profile Name: ACCOUNT--test Profile ACCOUNT--test was successfully saved ``` --------- Co-authored-by: Pieter Noordhuis --- cmd/auth/auth.go | 35 ++++++++++++++++++++++++++++++--- cmd/auth/login.go | 50 ++++++++++++++++++++++++++++++++++------------- cmd/auth/token.go | 12 +++++++----- 3 files changed, 75 insertions(+), 22 deletions(-) diff --git a/cmd/auth/auth.go b/cmd/auth/auth.go index 3efaca572..b7e8d2d78 100644 --- a/cmd/auth/auth.go +++ b/cmd/auth/auth.go @@ -1,8 +1,11 @@ package auth import ( + "context" + "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/auth" + "github.com/databricks/cli/libs/cmdio" "github.com/spf13/cobra" ) @@ -11,10 +14,36 @@ var authCmd = &cobra.Command{ Short: "Authentication related commands", } -var perisistentAuth auth.PersistentAuth +var persistentAuth auth.PersistentAuth + +func promptForHost(ctx context.Context) (string, error) { + prompt := cmdio.Prompt(ctx) + prompt.Label = "Databricks Host" + prompt.Default = "https://" + prompt.AllowEdit = true + // Validate? + host, err := prompt.Run() + if err != nil { + return "", err + } + return host, nil +} + +func promptForAccountID(ctx context.Context) (string, error) { + prompt := cmdio.Prompt(ctx) + prompt.Label = "Databricks Account ID" + prompt.Default = "" + prompt.AllowEdit = true + // Validate? + accountId, err := prompt.Run() + if err != nil { + return "", err + } + return accountId, nil +} func init() { root.RootCmd.AddCommand(authCmd) - authCmd.PersistentFlags().StringVar(&perisistentAuth.Host, "host", perisistentAuth.Host, "Databricks Host") - authCmd.PersistentFlags().StringVar(&perisistentAuth.AccountID, "account-id", perisistentAuth.AccountID, "Databricks Account ID") + authCmd.PersistentFlags().StringVar(&persistentAuth.Host, "host", persistentAuth.Host, "Databricks Host") + authCmd.PersistentFlags().StringVar(&persistentAuth.AccountID, "account-id", persistentAuth.AccountID, "Databricks Account ID") } diff --git a/cmd/auth/login.go b/cmd/auth/login.go index 6b708e957..37d44c084 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -17,16 +17,46 @@ import ( var loginTimeout time.Duration var configureCluster bool +func configureHost(ctx context.Context, args []string, argIndex int) error { + if len(args) > argIndex { + persistentAuth.Host = args[argIndex] + return nil + } + + host, err := promptForHost(ctx) + if err != nil { + return err + } + persistentAuth.Host = host + return nil +} + var loginCmd = &cobra.Command{ Use: "login [HOST]", Short: "Authenticate this machine", RunE: func(cmd *cobra.Command, args []string) error { - if perisistentAuth.Host == "" && len(args) == 1 { - perisistentAuth.Host = args[0] + ctx := cmd.Context() + if persistentAuth.Host == "" { + configureHost(ctx, args, 0) } + defer persistentAuth.Close() - defer perisistentAuth.Close() - ctx, cancel := context.WithTimeout(cmd.Context(), loginTimeout) + // We need the config without the profile before it's used to initialise new workspace client below. + // Otherwise it will complain about non existing profile because it was not yet saved. + cfg := config.Config{ + Host: persistentAuth.Host, + AuthType: "databricks-cli", + } + if cfg.IsAccountClient() && persistentAuth.AccountID == "" { + accountId, err := promptForAccountID(ctx) + if err != nil { + return err + } + persistentAuth.AccountID = accountId + } + cfg.AccountID = persistentAuth.AccountID + + ctx, cancel := context.WithTimeout(ctx, loginTimeout) defer cancel() var profileName string @@ -36,7 +66,7 @@ var loginCmd = &cobra.Command{ } else { prompt := cmdio.Prompt(ctx) prompt.Label = "Databricks Profile Name" - prompt.Default = perisistentAuth.ProfileName() + prompt.Default = persistentAuth.ProfileName() prompt.AllowEdit = true profile, err := prompt.Run() if err != nil { @@ -44,19 +74,11 @@ var loginCmd = &cobra.Command{ } profileName = profile } - err := perisistentAuth.Challenge(ctx) + err := persistentAuth.Challenge(ctx) if err != nil { return err } - // We need the config without the profile before it's used to initialise new workspace client below. - // Otherwise it will complain about non existing profile because it was not yet saved. - cfg := config.Config{ - Host: perisistentAuth.Host, - AccountID: perisistentAuth.AccountID, - AuthType: "databricks-cli", - } - if configureCluster { w, err := databricks.NewWorkspaceClient((*databricks.Config)(&cfg)) if err != nil { diff --git a/cmd/auth/token.go b/cmd/auth/token.go index f2754fa69..1b8d8b131 100644 --- a/cmd/auth/token.go +++ b/cmd/auth/token.go @@ -15,13 +15,15 @@ var tokenCmd = &cobra.Command{ Use: "token [HOST]", Short: "Get authentication token", RunE: func(cmd *cobra.Command, args []string) error { - if perisistentAuth.Host == "" && len(args) == 1 { - perisistentAuth.Host = args[0] + ctx := cmd.Context() + if persistentAuth.Host == "" { + configureHost(ctx, args, 0) } - defer perisistentAuth.Close() - ctx, cancel := context.WithTimeout(cmd.Context(), tokenTimeout) + defer persistentAuth.Close() + + ctx, cancel := context.WithTimeout(ctx, tokenTimeout) defer cancel() - t, err := perisistentAuth.Load(ctx) + t, err := persistentAuth.Load(ctx) if err != nil { return err }