From 807a37b36ab621802bffafd5ff3a7074404e9363 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 3 Mar 2025 20:28:43 +0530 Subject: [PATCH 1/8] Add the `auth.EnvVars` function (#2403) ## Changes This PR adds the auth.EnvVars function, which is a list of all environment variables that the SDK uses to read auth configuration. This is useful for spawning subprocesses since you can unset all auth environment variables to clean up the environment before configuring the auth. It's used in #2278 today and will also be useful for the `databricks bundle exec` command. ## Tests Unit test. --- libs/auth/env.go | 19 +++++++++++++++++ libs/auth/env_test.go | 48 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/libs/auth/env.go b/libs/auth/env.go index 5c0d21292..7413662b9 100644 --- a/libs/auth/env.go +++ b/libs/auth/env.go @@ -38,3 +38,22 @@ func GetEnvFor(name string) (string, bool) { return "", false } + +// EnvVars returns the list of environment variables that the SDK reads to configure +// authentication. +// This is useful for spawning subprocesses since you can unset all auth environment +// variables to clean up the environment before configuring authentication for the +// child process. +func EnvVars() []string { + out := []string{} + + for _, attr := range config.ConfigAttributes { + if len(attr.EnvVars) == 0 { + continue + } + + out = append(out, attr.EnvVars...) + } + + return out +} diff --git a/libs/auth/env_test.go b/libs/auth/env_test.go index 850110b60..38dc1c6b7 100644 --- a/libs/auth/env_test.go +++ b/libs/auth/env_test.go @@ -79,3 +79,51 @@ func TestGetEnvFor(t *testing.T) { assert.False(t, ok) assert.Empty(t, out) } + +func TestAuthEnvVars(t *testing.T) { + // Few common environment variables that we expect the SDK to support. + contains := []string{ + // Generic attributes. + "DATABRICKS_HOST", + "DATABRICKS_CONFIG_PROFILE", + "DATABRICKS_AUTH_TYPE", + "DATABRICKS_METADATA_SERVICE_URL", + "DATABRICKS_CONFIG_FILE", + + // OAuth specific attributes. + "DATABRICKS_CLIENT_ID", + "DATABRICKS_CLIENT_SECRET", + "DATABRICKS_CLI_PATH", + + // Google specific attributes. + "DATABRICKS_GOOGLE_SERVICE_ACCOUNT", + "GOOGLE_CREDENTIALS", + + // Personal access token specific attributes. + "DATABRICKS_TOKEN", + + // Databricks password specific attributes. + "DATABRICKS_USERNAME", + "DATABRICKS_PASSWORD", + + // Account authentication attributes. + "DATABRICKS_ACCOUNT_ID", + + // Azure attributes + "DATABRICKS_AZURE_RESOURCE_ID", + "ARM_USE_MSI", + "ARM_CLIENT_SECRET", + "ARM_CLIENT_ID", + "ARM_TENANT_ID", + "ARM_ENVIRONMENT", + + // Github attributes + "ACTIONS_ID_TOKEN_REQUEST_URL", + "ACTIONS_ID_TOKEN_REQUEST_TOKEN", + } + + out := EnvVars() + for _, v := range contains { + assert.Contains(t, out, v) + } +} From 3b07265113061b0827ab639173882f98025edee2 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 3 Mar 2025 15:34:41 +0000 Subject: [PATCH 2/8] Restrict same name libraries check for only whl and jar types (#2401) ## Changes Same name libraries check only valid for local libraries. Local libraries are only supported for Whl and Jar types. Hence we can restrict matching pattern only to these libraries. ## Tests Existing acceptance tests pass --- bundle/libraries/same_name_libraries.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bundle/libraries/same_name_libraries.go b/bundle/libraries/same_name_libraries.go index ab869d3d2..907843193 100644 --- a/bundle/libraries/same_name_libraries.go +++ b/bundle/libraries/same_name_libraries.go @@ -13,8 +13,10 @@ import ( type checkForSameNameLibraries struct{} var patterns = []dyn.Pattern{ - taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()), - forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()), + taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.Key("whl")), + taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.Key("jar")), + forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.Key("whl")), + forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.Key("jar")), envDepsPattern.Append(dyn.AnyIndex()), } From 8b7e0ba6837b056caf986b605f3cf06e0d879d0a Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 3 Mar 2025 15:53:58 +0000 Subject: [PATCH 3/8] test: Lock setuptools version in TestDefaultPython test (#2421) ## Changes Lock setuptools version to 75.8.2 (latest as of March 3, 2025) ## Why As part of the tests `uv install` was installing latest version of setuptools which led to all tests started to fail on Feb 25 when 75.8.1 setuptools version was released and which changed the naming of the output built artifacts https://setuptools.pypa.io/en/stable/history.html#v75-8-1 This change prevents us from breakages like this. ## Tests Existing tests pass --- integration/bundle/init_default_python_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/bundle/init_default_python_test.go b/integration/bundle/init_default_python_test.go index ca66491ab..9ac8f47bd 100644 --- a/integration/bundle/init_default_python_test.go +++ b/integration/bundle/init_default_python_test.go @@ -31,8 +31,8 @@ var pythonVersionsShort = []string{ } var extraInstalls = map[string][]string{ - "3.12": {"setuptools"}, - "3.13": {"setuptools"}, + "3.12": {"setuptools==75.8.2"}, + "3.13": {"setuptools==75.8.2"}, } func TestDefaultPython(t *testing.T) { From 010f88f84efb9c356fae9f4d69e9aec5f52e7204 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 3 Mar 2025 16:40:28 +0000 Subject: [PATCH 4/8] Added a warning when `config` section is used in apps (#2416) ## Changes Added a warning when `config` section is used in apps ## Why To avoid the confusion between using apps in DABs and outside of DABs, we want to provide only one way of configuring apps runtime configuration - by using `app.yml` file in the root of the app. ## Tests Added acceptance tests --- acceptance/bundle/apps/app_yaml/app/app.py | 0 acceptance/bundle/apps/app_yaml/app/app.yml | 3 ++ .../bundle/apps/app_yaml/databricks.yml | 8 ++++ .../bundle/apps/app_yaml/out.app.yml.txt | 5 +++ acceptance/bundle/apps/app_yaml/output.txt | 15 +++++++ acceptance/bundle/apps/app_yaml/script | 4 ++ .../bundle/apps/config_section/app/app.py | 1 + .../bundle/apps/config_section/databricks.yml | 12 ++++++ .../apps/config_section/out.app.yml.txt | 5 +++ .../bundle/apps/config_section/output.txt | 23 +++++++++++ acceptance/bundle/apps/config_section/script | 4 ++ acceptance/bundle/apps/test.toml | 26 ++++++++++++ bundle/apps/validate.go | 19 +++------ bundle/apps/validate_test.go | 40 ------------------- .../bundle/testdata/apps/bundle_deploy.txt | 4 ++ .../bundle/testdata/apps/bundle_validate.txt | 6 ++- 16 files changed, 121 insertions(+), 54 deletions(-) create mode 100644 acceptance/bundle/apps/app_yaml/app/app.py create mode 100644 acceptance/bundle/apps/app_yaml/app/app.yml create mode 100644 acceptance/bundle/apps/app_yaml/databricks.yml create mode 100644 acceptance/bundle/apps/app_yaml/out.app.yml.txt create mode 100644 acceptance/bundle/apps/app_yaml/output.txt create mode 100644 acceptance/bundle/apps/app_yaml/script create mode 100644 acceptance/bundle/apps/config_section/app/app.py create mode 100644 acceptance/bundle/apps/config_section/databricks.yml create mode 100644 acceptance/bundle/apps/config_section/out.app.yml.txt create mode 100644 acceptance/bundle/apps/config_section/output.txt create mode 100644 acceptance/bundle/apps/config_section/script create mode 100644 acceptance/bundle/apps/test.toml diff --git a/acceptance/bundle/apps/app_yaml/app/app.py b/acceptance/bundle/apps/app_yaml/app/app.py new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/apps/app_yaml/app/app.yml b/acceptance/bundle/apps/app_yaml/app/app.yml new file mode 100644 index 000000000..61471358d --- /dev/null +++ b/acceptance/bundle/apps/app_yaml/app/app.yml @@ -0,0 +1,3 @@ +command: + - python + - app.py diff --git a/acceptance/bundle/apps/app_yaml/databricks.yml b/acceptance/bundle/apps/app_yaml/databricks.yml new file mode 100644 index 000000000..0064e6c6b --- /dev/null +++ b/acceptance/bundle/apps/app_yaml/databricks.yml @@ -0,0 +1,8 @@ +bundle: + name: apps_yaml + +resources: + apps: + myapp: + name: myapp + source_code_path: ./app diff --git a/acceptance/bundle/apps/app_yaml/out.app.yml.txt b/acceptance/bundle/apps/app_yaml/out.app.yml.txt new file mode 100644 index 000000000..eccd4eb13 --- /dev/null +++ b/acceptance/bundle/apps/app_yaml/out.app.yml.txt @@ -0,0 +1,5 @@ +{ + "method": "POST", + "path": "/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/apps_yaml/default/files/app/app.yml", + "raw_body": "command:\n - python\n - app.py\n" +} diff --git a/acceptance/bundle/apps/app_yaml/output.txt b/acceptance/bundle/apps/app_yaml/output.txt new file mode 100644 index 000000000..2a946e5ee --- /dev/null +++ b/acceptance/bundle/apps/app_yaml/output.txt @@ -0,0 +1,15 @@ + +>>> [CLI] bundle validate +Name: apps_yaml +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/apps_yaml/default + +Validation OK! + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/apps_yaml/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/acceptance/bundle/apps/app_yaml/script b/acceptance/bundle/apps/app_yaml/script new file mode 100644 index 000000000..8cb34c62d --- /dev/null +++ b/acceptance/bundle/apps/app_yaml/script @@ -0,0 +1,4 @@ +trace $CLI bundle validate +trace $CLI bundle deploy +jq 'select(.path == "/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/apps_yaml/default/files/app/app.yml")' out.requests.txt | sed 's/\\r//g' > out.app.yml.txt +rm out.requests.txt diff --git a/acceptance/bundle/apps/config_section/app/app.py b/acceptance/bundle/apps/config_section/app/app.py new file mode 100644 index 000000000..f1a18139c --- /dev/null +++ b/acceptance/bundle/apps/config_section/app/app.py @@ -0,0 +1 @@ +print("Hello world!") diff --git a/acceptance/bundle/apps/config_section/databricks.yml b/acceptance/bundle/apps/config_section/databricks.yml new file mode 100644 index 000000000..25ab2f261 --- /dev/null +++ b/acceptance/bundle/apps/config_section/databricks.yml @@ -0,0 +1,12 @@ +bundle: + name: apps_config_section + +resources: + apps: + myapp: + name: myapp + source_code_path: ./app + config: + command: + - python + - app.py diff --git a/acceptance/bundle/apps/config_section/out.app.yml.txt b/acceptance/bundle/apps/config_section/out.app.yml.txt new file mode 100644 index 000000000..a3e69351b --- /dev/null +++ b/acceptance/bundle/apps/config_section/out.app.yml.txt @@ -0,0 +1,5 @@ +{ + "method": "POST", + "path": "/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/apps_config_section/default/files/app/app.yml", + "raw_body": "command:\n - python\n - app.py\n" +} diff --git a/acceptance/bundle/apps/config_section/output.txt b/acceptance/bundle/apps/config_section/output.txt new file mode 100644 index 000000000..800945278 --- /dev/null +++ b/acceptance/bundle/apps/config_section/output.txt @@ -0,0 +1,23 @@ + +>>> [CLI] bundle validate +Warning: App config section detected + +remove 'config' from app resource 'myapp' section and use app.yml file in the root of this app instead + +Name: apps_config_section +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/apps_config_section/default + +Found 1 warning + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/apps_config_section/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! +Warning: App config section detected + +remove 'config' from app resource 'myapp' section and use app.yml file in the root of this app instead + diff --git a/acceptance/bundle/apps/config_section/script b/acceptance/bundle/apps/config_section/script new file mode 100644 index 000000000..5769918be --- /dev/null +++ b/acceptance/bundle/apps/config_section/script @@ -0,0 +1,4 @@ +trace $CLI bundle validate +trace $CLI bundle deploy +jq 'select(.path == "/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/apps_config_section/default/files/app/app.yml")' out.requests.txt > out.app.yml.txt +rm out.requests.txt diff --git a/acceptance/bundle/apps/test.toml b/acceptance/bundle/apps/test.toml new file mode 100644 index 000000000..6999f2089 --- /dev/null +++ b/acceptance/bundle/apps/test.toml @@ -0,0 +1,26 @@ +Cloud = false +RecordRequests = true + +Ignore = [ + '.databricks', +] + +[[Server]] +Pattern = "POST /api/2.0/apps" + +[[Server]] +Pattern = "GET /api/2.0/apps/myapp" +Response.Body = ''' +{ + "name": "myapp", + "description": "", + "compute_status": { + "state": "ACTIVE", + "message": "App compute is active." + }, + "app_status": { + "state": "RUNNING", + "message": "Application is running." + } +} +''' diff --git a/bundle/apps/validate.go b/bundle/apps/validate.go index fc50aeafc..7a4fe7126 100644 --- a/bundle/apps/validate.go +++ b/bundle/apps/validate.go @@ -3,8 +3,6 @@ package apps import ( "context" "fmt" - "path" - "strings" "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/diag" @@ -14,7 +12,6 @@ type validate struct{} func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { var diags diag.Diagnostics - possibleConfigFiles := []string{"app.yml", "app.yaml"} usedSourceCodePaths := make(map[string]string) for key, app := range b.Config.Resources.Apps { @@ -28,16 +25,12 @@ func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics } usedSourceCodePaths[app.SourceCodePath] = key - for _, configFile := range possibleConfigFiles { - appPath := strings.TrimPrefix(app.SourceCodePath, b.Config.Workspace.FilePath) - cf := path.Join(appPath, configFile) - if _, err := b.SyncRoot.Stat(cf); err == nil { - diags = append(diags, diag.Diagnostic{ - Severity: diag.Error, - Summary: configFile + " detected", - Detail: fmt.Sprintf("remove %s and use 'config' property for app resource '%s' instead", cf, app.Name), - }) - } + if app.Config != nil { + diags = append(diags, diag.Diagnostic{ + Severity: diag.Warning, + Summary: "App config section detected", + Detail: fmt.Sprintf("remove 'config' from app resource '%s' section and use app.yml file in the root of this app instead", key), + }) } } diff --git a/bundle/apps/validate_test.go b/bundle/apps/validate_test.go index 11270198e..d218f96ca 100644 --- a/bundle/apps/validate_test.go +++ b/bundle/apps/validate_test.go @@ -17,46 +17,6 @@ import ( "github.com/stretchr/testify/require" ) -func TestAppsValidate(t *testing.T) { - tmpDir := t.TempDir() - testutil.Touch(t, tmpDir, "app1", "app.yml") - testutil.Touch(t, tmpDir, "app2", "app.py") - - b := &bundle.Bundle{ - BundleRootPath: tmpDir, - SyncRootPath: tmpDir, - SyncRoot: vfs.MustNew(tmpDir), - Config: config.Root{ - Workspace: config.Workspace{ - FilePath: "/foo/bar/", - }, - Resources: config.Resources{ - Apps: map[string]*resources.App{ - "app1": { - App: &apps.App{ - Name: "app1", - }, - SourceCodePath: "./app1", - }, - "app2": { - App: &apps.App{ - Name: "app2", - }, - SourceCodePath: "./app2", - }, - }, - }, - }, - } - - bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) - - diags := bundle.ApplySeq(context.Background(), b, mutator.TranslatePaths(), Validate()) - require.Len(t, diags, 1) - require.Equal(t, "app.yml detected", diags[0].Summary) - require.Contains(t, diags[0].Detail, "app.yml and use 'config' property for app resource") -} - func TestAppsValidateSameSourcePath(t *testing.T) { tmpDir := t.TempDir() testutil.Touch(t, tmpDir, "app1", "app.py") diff --git a/integration/bundle/testdata/apps/bundle_deploy.txt b/integration/bundle/testdata/apps/bundle_deploy.txt index 437a55596..55b8dedc6 100644 --- a/integration/bundle/testdata/apps/bundle_deploy.txt +++ b/integration/bundle/testdata/apps/bundle_deploy.txt @@ -2,3 +2,7 @@ Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/$UNIQUE_PRJ/files. Deploying resources... Updating deployment state... Deployment complete! +Warning: App config section detected + +remove 'config' from app resource 'test_app' section and use app.yml file in the root of this app instead + diff --git a/integration/bundle/testdata/apps/bundle_validate.txt b/integration/bundle/testdata/apps/bundle_validate.txt index 567fafd24..cb37fc4bd 100644 --- a/integration/bundle/testdata/apps/bundle_validate.txt +++ b/integration/bundle/testdata/apps/bundle_validate.txt @@ -1,7 +1,11 @@ +Warning: App config section detected + +remove 'config' from app resource 'test_app' section and use app.yml file in the root of this app instead + Name: basic Target: default Workspace: User: [USERNAME] Path: /Workspace/Users/[USERNAME]/.bundle/$UNIQUE_PRJ -Validation OK! +Found 1 warning From bcce6b0ece7c7b5eece18d2c712246eb36cf6c97 Mon Sep 17 00:00:00 2001 From: Anton Nekipelov <226657+anton-107@users.noreply.github.com> Date: Tue, 4 Mar 2025 12:43:26 +0100 Subject: [PATCH 5/8] Enable TestAuthDescribeFailure integration test (#2420) ## Changes 1. remove t.Skip directive from TestAuthDescribeFailure integration test 2. remove checking the host in the test ## Why 1. This enables integration test that exercises `databricks auth describe` command, which was previously throwing errors in the CI/CD pipeline. For the nonexistent profile check to fail, we need to create a new profile file for the CLI to check (otherwise the configuration skips checking of the provided profile actually exists). 2. I removed the assertion for the host part because it is not relevant for the test, and setting that up just required more code in the test setup. ## Tests The integration test is passing --- integration/cmd/auth/describe_test.go | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/integration/cmd/auth/describe_test.go b/integration/cmd/auth/describe_test.go index 66b83b557..3d0a9b17d 100644 --- a/integration/cmd/auth/describe_test.go +++ b/integration/cmd/auth/describe_test.go @@ -2,10 +2,15 @@ package auth_test import ( "context" + "path/filepath" "regexp" "strings" "testing" + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/databrickscfg" + "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/cli/internal/testcli" "github.com/databricks/databricks-sdk-go" "github.com/stretchr/testify/require" @@ -34,8 +39,19 @@ func TestAuthDescribeSuccess(t *testing.T) { } func TestAuthDescribeFailure(t *testing.T) { - t.Skipf("Skipping because of https://github.com/databricks/cli/issues/2010") + testutil.CleanupEnvironment(t) + // set up a custom config file: + home := t.TempDir() + cfg := &config.Config{ + ConfigFile: filepath.Join(home, "customcfg"), + Profile: "profile1", + } + err := databrickscfg.SaveToProfile(context.Background(), cfg) + require.NoError(t, err) + t.Setenv("DATABRICKS_CONFIG_FILE", filepath.Join(home, "customcfg")) + + // run the command: ctx := context.Background() stdout, _ := testcli.RequireSuccessfulRun(t, ctx, "auth", "describe", "--profile", "nonexistent") outStr := stdout.String() @@ -44,10 +60,5 @@ func TestAuthDescribeFailure(t *testing.T) { require.Contains(t, outStr, "Unable to authenticate: resolve") require.Contains(t, outStr, "has no nonexistent profile configured") require.Contains(t, outStr, "Current configuration:") - - w, err := databricks.NewWorkspaceClient(&databricks.Config{}) - require.NoError(t, err) - - require.Contains(t, outStr, "✓ host: "+w.Config.Host) require.Contains(t, outStr, "✓ profile: nonexistent (from --profile flag)") } From c0f5436a28778d89896685b29e83484a4341cee3 Mon Sep 17 00:00:00 2001 From: Anton Nekipelov <226657+anton-107@users.noreply.github.com> Date: Tue, 4 Mar 2025 13:46:39 +0100 Subject: [PATCH 6/8] Add support for schemas in deployment bind/unbind commands (#2406) ## Changes 1. Changed `FindResourceByConfigKey` to return schema resources 2. Implemented the `Exists` method for schema resources ## Why This PR adds support for schema resources in deployment operations, enabling users to: - Bind schemas using `databricks bundle deployment bind ` - Unbind schemas using `databricks bundle deployment unbind ` Where: - `myschema_key` is a resource key defined in the bundle's .yml file - `schema_full_name` references an existing schema in the Databricks workspace These capabilities allow for more flexible resource management of schemas within bundles. ## Tests Added a new integration test that tests bind and unbind methods together with bundle deployment and destruction. --- bundle/config/resources.go | 6 +++ bundle/config/resources/schema.go | 23 +++++++- bundle/config/resources/schema_test.go | 26 +++++++++ integration/bundle/bind_resource_test.go | 54 +++++++++++++++++++ .../databricks_template_schema.json | 8 +++ .../template/databricks.yml.tmpl | 13 +++++ 6 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 bundle/config/resources/schema_test.go create mode 100644 integration/bundle/bundles/uc_schema_only/databricks_template_schema.json create mode 100644 integration/bundle/bundles/uc_schema_only/template/databricks.yml.tmpl diff --git a/bundle/config/resources.go b/bundle/config/resources.go index 1f523fed3..866bd7641 100644 --- a/bundle/config/resources.go +++ b/bundle/config/resources.go @@ -112,6 +112,12 @@ func (r *Resources) FindResourceByConfigKey(key string) (ConfigResource, error) } } + for k := range r.Schemas { + if k == key { + found = append(found, r.Schemas[k]) + } + } + if len(found) == 0 { return nil, fmt.Errorf("no such resource: %s", key) } diff --git a/bundle/config/resources/schema.go b/bundle/config/resources/schema.go index b638907ac..d9849fd2d 100644 --- a/bundle/config/resources/schema.go +++ b/bundle/config/resources/schema.go @@ -6,6 +6,10 @@ import ( "net/url" "strings" + "github.com/databricks/databricks-sdk-go/apierr" + + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/marshal" "github.com/databricks/databricks-sdk-go/service/catalog" @@ -25,8 +29,23 @@ type Schema struct { URL string `json:"url,omitempty" bundle:"internal"` } -func (s *Schema) Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) { - return false, errors.New("schema.Exists() is not supported") +func (s *Schema) Exists(ctx context.Context, w *databricks.WorkspaceClient, fullName string) (bool, error) { + log.Tracef(ctx, "Checking if schema with fullName=%s exists", fullName) + + _, err := w.Schemas.GetByFullName(ctx, fullName) + if err != nil { + log.Debugf(ctx, "schema with full name %s does not exist: %v", fullName, err) + + var aerr *apierr.APIError + if errors.As(err, &aerr) { + if aerr.StatusCode == 404 { + return false, nil + } + } + + return false, err + } + return true, nil } func (s *Schema) TerraformResourceName() string { diff --git a/bundle/config/resources/schema_test.go b/bundle/config/resources/schema_test.go new file mode 100644 index 000000000..b609b6565 --- /dev/null +++ b/bundle/config/resources/schema_test.go @@ -0,0 +1,26 @@ +package resources + +import ( + "context" + "testing" + + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestSchemaNotFound(t *testing.T) { + ctx := context.Background() + + m := mocks.NewMockWorkspaceClient(t) + m.GetMockSchemasAPI().On("GetByFullName", mock.Anything, "non-existent-schema").Return(nil, &apierr.APIError{ + StatusCode: 404, + }) + + s := &Schema{} + exists, err := s.Exists(ctx, m.WorkspaceClient, "non-existent-schema") + + require.Falsef(t, exists, "Exists should return false when getting a 404 response from Workspace") + require.NoErrorf(t, err, "Exists should not return an error when getting a 404 response from Workspace") +} diff --git a/integration/bundle/bind_resource_test.go b/integration/bundle/bind_resource_test.go index ba10190aa..b182150ad 100644 --- a/integration/bundle/bind_resource_test.go +++ b/integration/bundle/bind_resource_test.go @@ -15,8 +15,62 @@ import ( "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/databricks/databricks-sdk-go/service/catalog" ) +func TestBindSchemaToExistingSchema(t *testing.T) { + ctx, wt := acc.UcWorkspaceTest(t) + + // create a pre-defined schema: + uniqueId := uuid.New().String() + predefinedSchema, err := wt.W.Schemas.Create(ctx, catalog.CreateSchema{ + CatalogName: "main", + Name: "test-schema-" + uniqueId, + }) + require.NoError(t, err) + t.Cleanup(func() { + err := wt.W.Schemas.DeleteByFullName(ctx, predefinedSchema.FullName) + require.NoError(t, err) + }) + + // setup the bundle: + bundleRoot := initTestTemplate(t, ctx, "uc_schema_only", map[string]any{ + "unique_id": uniqueId, + }) + ctx = env.Set(ctx, "BUNDLE_ROOT", bundleRoot) + + // run the bind command: + c := testcli.NewRunner(t, ctx, "bundle", "deployment", "bind", "schema1", predefinedSchema.FullName, "--auto-approve") + _, _, err = c.Run() + require.NoError(t, err) + + // deploy the bundle: + deployBundle(t, ctx, bundleRoot) + + // Check that predefinedSchema is updated with config from bundle + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + updatedSchema, err := w.Schemas.GetByFullName(ctx, predefinedSchema.FullName) + require.NoError(t, err) + require.Equal(t, updatedSchema.SchemaId, predefinedSchema.SchemaId) + require.Equal(t, "This schema was created from DABs", updatedSchema.Comment) + + // unbind the schema: + c = testcli.NewRunner(t, ctx, "bundle", "deployment", "unbind", "schema1") + _, _, err = c.Run() + require.NoError(t, err) + + // destroy the bundle: + destroyBundle(t, ctx, bundleRoot) + + // Check that schema is unbound and exists after bundle is destroyed + postDestroySchema, err := w.Schemas.GetByFullName(ctx, predefinedSchema.FullName) + require.NoError(t, err) + require.Equal(t, postDestroySchema.SchemaId, predefinedSchema.SchemaId) +} + func TestBindJobToExistingJob(t *testing.T) { ctx, wt := acc.WorkspaceTest(t) gt := &generateJobTest{T: wt, w: wt.W} diff --git a/integration/bundle/bundles/uc_schema_only/databricks_template_schema.json b/integration/bundle/bundles/uc_schema_only/databricks_template_schema.json new file mode 100644 index 000000000..1d3cdec85 --- /dev/null +++ b/integration/bundle/bundles/uc_schema_only/databricks_template_schema.json @@ -0,0 +1,8 @@ +{ + "properties": { + "unique_id": { + "type": "string", + "description": "Unique ID for the schema name" + } + } +} diff --git a/integration/bundle/bundles/uc_schema_only/template/databricks.yml.tmpl b/integration/bundle/bundles/uc_schema_only/template/databricks.yml.tmpl new file mode 100644 index 000000000..39c64f12b --- /dev/null +++ b/integration/bundle/bundles/uc_schema_only/template/databricks.yml.tmpl @@ -0,0 +1,13 @@ +bundle: + name: uc-schema-only + +workspace: + root_path: "~/.bundle/{{.unique_id}}" + +resources: + schemas: + schema1: + name: test-schema-{{.unique_id}} + catalog_name: main + comment: This schema was created from DABs + From 549b226cbce70f6310ba69fbbdda33361741ba45 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 4 Mar 2025 20:17:11 +0530 Subject: [PATCH 7/8] Add the `auth.ProcessEnv` function (#2404) ## Changes This function provides all environment variables necessary to authenticate the downstream applications to the same credentials as the parent process. It's used in https://github.com/databricks/cli/pull/2278 and will also be useful for the `databricks bundle exec` command. ## Tests Unit test. --- internal/testutil/env.go | 31 +++++++++++++-------- libs/auth/env.go | 60 ++++++++++++++++++++++++++++++++++++++-- libs/auth/env_test.go | 2 +- 3 files changed, 79 insertions(+), 14 deletions(-) diff --git a/internal/testutil/env.go b/internal/testutil/env.go index 598229655..1ecbe6485 100644 --- a/internal/testutil/env.go +++ b/internal/testutil/env.go @@ -13,19 +13,11 @@ import ( // The original environment is restored upon test completion. // Note: use of this function is incompatible with parallel execution. func CleanupEnvironment(t TestingT) { - // Restore environment when test finishes. - environ := os.Environ() - t.Cleanup(func() { - // Restore original environment. - for _, kv := range environ { - kvs := strings.SplitN(kv, "=", 2) - os.Setenv(kvs[0], kvs[1]) - } - }) - path := os.Getenv("PATH") pwd := os.Getenv("PWD") - os.Clearenv() + + // Clear all environment variables. + NullEnvironment(t) // We use t.Setenv instead of os.Setenv because the former actively // prevents a test being run with t.Parallel. Modifying the environment @@ -38,6 +30,23 @@ func CleanupEnvironment(t TestingT) { } } +// NullEnvironment sets up an empty environment with absolutely no environment variables set. +// The original environment is restored upon test completion. +// Note: use of this function is incompatible with parallel execution +func NullEnvironment(t TestingT) { + // Restore environment when test finishes. + environ := os.Environ() + t.Cleanup(func() { + // Restore original environment. + for _, kv := range environ { + kvs := strings.SplitN(kv, "=", 2) + os.Setenv(kvs[0], kvs[1]) + } + }) + + os.Clearenv() +} + // Changes into specified directory for the duration of the test. // Returns the current working directory. func Chdir(t TestingT, dir string) string { diff --git a/libs/auth/env.go b/libs/auth/env.go index 7413662b9..08282e463 100644 --- a/libs/auth/env.go +++ b/libs/auth/env.go @@ -1,6 +1,13 @@ package auth -import "github.com/databricks/databricks-sdk-go/config" +import ( + "fmt" + "os" + "slices" + "strings" + + "github.com/databricks/databricks-sdk-go/config" +) // Env generates the authentication environment variables we need to set for // downstream applications from the CLI to work correctly. @@ -44,7 +51,7 @@ func GetEnvFor(name string) (string, bool) { // This is useful for spawning subprocesses since you can unset all auth environment // variables to clean up the environment before configuring authentication for the // child process. -func EnvVars() []string { +func envVars() []string { out := []string{} for _, attr := range config.ConfigAttributes { @@ -57,3 +64,52 @@ func EnvVars() []string { return out } + +// ProcessEnv generates the environment variables that should be set to authenticate +// downstream processes to use the same auth credentials as in cfg. +func ProcessEnv(cfg *config.Config) []string { + // We want child processes to inherit environment variables like $HOME or $HTTPS_PROXY + // because they influence auth resolution. + base := os.Environ() + + out := []string{} + authEnvVars := envVars() + + // Remove any existing auth environment variables. This is done because + // the CLI offers multiple modalities of configuring authentication like + // `--profile` or `DATABRICKS_CONFIG_PROFILE` or `profile: ` in the + // bundle config file. + // + // Each of these modalities have different priorities and thus we don't want + // any auth configuration to piggyback into the child process environment. + // + // This is a precaution to avoid conflicting auth configurations being passed + // to the child telemetry process. + // + // Normally this should be unnecessary because the SDK should error if multiple + // authentication methods have been configured. But there is no harm in doing this + // as a precaution. + for _, v := range base { + k, _, found := strings.Cut(v, "=") + if !found { + continue + } + if slices.Contains(authEnvVars, k) { + continue + } + out = append(out, v) + } + + // Now add the necessary authentication environment variables. + newEnv := Env(cfg) + for k, v := range newEnv { + out = append(out, fmt.Sprintf("%s=%s", k, v)) + } + + // Sort the environment variables so that the output is deterministic. + // Keeping the output deterministic helps with reproducibility and keeping the + // behavior consistent incase there are any issues. + slices.Sort(out) + + return out +} diff --git a/libs/auth/env_test.go b/libs/auth/env_test.go index 38dc1c6b7..5aa893e11 100644 --- a/libs/auth/env_test.go +++ b/libs/auth/env_test.go @@ -122,7 +122,7 @@ func TestAuthEnvVars(t *testing.T) { "ACTIONS_ID_TOKEN_REQUEST_TOKEN", } - out := EnvVars() + out := envVars() for _, v := range contains { assert.Contains(t, out, v) } From 41961226be98aa66a63a5514cb309d53881f9418 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 4 Mar 2025 15:03:51 +0000 Subject: [PATCH 8/8] Switch to use GET workspaces-files/{name} instead of workspace/export for state files (#2423) ## Changes Switch to use GET workspaces-files/{name} instead of workspace/export for state files. ## Why `/api/2.0./workspaces-files/{name}` has a higher limit which allows to export state files larger than 10 MBs (which is the current limit for `workspace/export`). We don't use the same API for read in other places and fully replacing existing Filer because it doesn't correct get the file content for notebooks and returns "File Not Found" error instead. ## Tests All existing tests pass --- acceptance/bundle/state/databricks.yml | 21 ++++++ acceptance/bundle/state/out.state.txt | 4 + acceptance/bundle/state/output.txt | 12 +++ acceptance/bundle/state/script | 4 + acceptance/bundle/state/test.py | 1 + acceptance/bundle/state/test.toml | 2 + acceptance/server_test.go | 5 ++ bundle/deploy/filer.go | 86 +++++++++++++++++++++- bundle/deploy/state_push.go | 13 ---- bundle/deploy/terraform/state_push.go | 11 --- bundle/deploy/terraform/state_push_test.go | 27 ------- libs/testserver/fake_workspace.go | 7 ++ 12 files changed, 139 insertions(+), 54 deletions(-) create mode 100644 acceptance/bundle/state/databricks.yml create mode 100644 acceptance/bundle/state/out.state.txt create mode 100644 acceptance/bundle/state/output.txt create mode 100644 acceptance/bundle/state/script create mode 100644 acceptance/bundle/state/test.py create mode 100644 acceptance/bundle/state/test.toml diff --git a/acceptance/bundle/state/databricks.yml b/acceptance/bundle/state/databricks.yml new file mode 100644 index 000000000..4775ba33d --- /dev/null +++ b/acceptance/bundle/state/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: state + +resources: + jobs: + test: + name: "test" + tasks: + - task_key: "test-task" + spark_python_task: + python_file: ./test.py + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode diff --git a/acceptance/bundle/state/out.state.txt b/acceptance/bundle/state/out.state.txt new file mode 100644 index 000000000..96966812b --- /dev/null +++ b/acceptance/bundle/state/out.state.txt @@ -0,0 +1,4 @@ +{ + "method": "GET", + "path": "/api/2.0/workspace-files/Workspace/Users/[USERNAME]/.bundle/state/default/state/terraform.tfstate" +} diff --git a/acceptance/bundle/state/output.txt b/acceptance/bundle/state/output.txt new file mode 100644 index 000000000..ac13a7ba4 --- /dev/null +++ b/acceptance/bundle/state/output.txt @@ -0,0 +1,12 @@ + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/state/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/state/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/acceptance/bundle/state/script b/acceptance/bundle/state/script new file mode 100644 index 000000000..9977f7d52 --- /dev/null +++ b/acceptance/bundle/state/script @@ -0,0 +1,4 @@ +trace $CLI bundle deploy +trace $CLI bundle deploy # We do 2 deploys because only 2nd deploy will pull state from remote after 1st created it +jq 'select(.path == "/api/2.0/workspace-files/Workspace/Users/[USERNAME]/.bundle/state/default/state/terraform.tfstate")' out.requests.txt > out.state.txt +rm out.requests.txt diff --git a/acceptance/bundle/state/test.py b/acceptance/bundle/state/test.py new file mode 100644 index 000000000..f1a18139c --- /dev/null +++ b/acceptance/bundle/state/test.py @@ -0,0 +1 @@ +print("Hello world!") diff --git a/acceptance/bundle/state/test.toml b/acceptance/bundle/state/test.toml new file mode 100644 index 000000000..7061886de --- /dev/null +++ b/acceptance/bundle/state/test.toml @@ -0,0 +1,2 @@ +Cloud = false +RecordRequests = true diff --git a/acceptance/server_test.go b/acceptance/server_test.go index 402e3ca5f..7ef705931 100644 --- a/acceptance/server_test.go +++ b/acceptance/server_test.go @@ -111,6 +111,11 @@ func AddHandlers(server *testserver.Server) { return "" }) + server.Handle("GET", "/api/2.0/workspace-files/{path:.*}", func(req testserver.Request) any { + path := req.Vars["path"] + return req.Workspace.WorkspaceFilesExportFile(path) + }) + server.Handle("GET", "/api/2.1/unity-catalog/current-metastore-assignment", func(req testserver.Request) any { return testMetastore }) diff --git a/bundle/deploy/filer.go b/bundle/deploy/filer.go index c0fd839ef..b65f08a67 100644 --- a/bundle/deploy/filer.go +++ b/bundle/deploy/filer.go @@ -1,14 +1,94 @@ package deploy import ( + "bytes" + "context" + "fmt" + "io" + "io/fs" + "net/http" + "net/url" + "strings" + "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/filer" + "github.com/databricks/databricks-sdk-go/client" ) // FilerFactory is a function that returns a filer.Filer. type FilerFactory func(b *bundle.Bundle) (filer.Filer, error) -// StateFiler returns a filer.Filer that can be used to read/write state files. -func StateFiler(b *bundle.Bundle) (filer.Filer, error) { - return filer.NewWorkspaceFilesClient(b.WorkspaceClient(), b.Config.Workspace.StatePath) +type stateFiler struct { + filer filer.Filer + + apiClient *client.DatabricksClient + root filer.WorkspaceRootPath +} + +func (s stateFiler) Delete(ctx context.Context, path string, mode ...filer.DeleteMode) error { + return s.filer.Delete(ctx, path, mode...) +} + +// Mkdir implements filer.Filer. +func (s stateFiler) Mkdir(ctx context.Context, path string) error { + return s.filer.Mkdir(ctx, path) +} + +func (s stateFiler) Read(ctx context.Context, path string) (io.ReadCloser, error) { + absPath, err := s.root.Join(path) + if err != nil { + return nil, err + } + + stat, err := s.Stat(ctx, path) + if err != nil { + return nil, err + } + if stat.IsDir() { + return nil, fmt.Errorf("not a file: %s", absPath) + } + + var buf bytes.Buffer + urlPath := "/api/2.0/workspace-files/" + url.PathEscape(strings.TrimLeft(absPath, "/")) + err = s.apiClient.Do(ctx, http.MethodGet, urlPath, nil, nil, nil, &buf) + if err != nil { + return nil, err + } + + return io.NopCloser(&buf), nil +} + +func (s stateFiler) ReadDir(ctx context.Context, path string) ([]fs.DirEntry, error) { + return s.filer.ReadDir(ctx, path) +} + +func (s stateFiler) Stat(ctx context.Context, name string) (fs.FileInfo, error) { + return s.filer.Stat(ctx, name) +} + +func (s stateFiler) Write(ctx context.Context, path string, reader io.Reader, mode ...filer.WriteMode) error { + return s.filer.Write(ctx, path, reader, mode...) +} + +// StateFiler returns a filer.Filer that can be used to read/write state files. +// We use a custom workspace filer which uses workspace-files API to read state files. +// This API has a higher than 10 MB limits and allows to export large state files. +// We don't use the same API for read because it doesn't correct get the file content for notebooks and returns +// "File Not Found" error instead. +func StateFiler(b *bundle.Bundle) (filer.Filer, error) { + f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(), b.Config.Workspace.StatePath) + if err != nil { + return nil, err + } + + apiClient, err := client.New(b.WorkspaceClient().Config) + if err != nil { + return nil, fmt.Errorf("failed to create API client: %w", err) + } + + return stateFiler{ + filer: f, + root: filer.NewWorkspaceRootPath(b.Config.Workspace.StatePath), + apiClient: apiClient, + }, nil } diff --git a/bundle/deploy/state_push.go b/bundle/deploy/state_push.go index 6912414c1..176a907c8 100644 --- a/bundle/deploy/state_push.go +++ b/bundle/deploy/state_push.go @@ -10,8 +10,6 @@ import ( "github.com/databricks/cli/libs/log" ) -const MaxStateFileSize = 10 * 1024 * 1024 // 10MB - type statePush struct { filerFactory FilerFactory } @@ -37,17 +35,6 @@ func (s *statePush) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic } defer local.Close() - if !b.Config.Bundle.Force { - state, err := local.Stat() - if err != nil { - return diag.FromErr(err) - } - - if state.Size() > MaxStateFileSize { - return diag.Errorf("Deployment state file size exceeds the maximum allowed size of %d bytes. Please reduce the number of resources in your bundle, split your bundle into multiple or re-run the command with --force flag.", MaxStateFileSize) - } - } - log.Infof(ctx, "Writing local deployment state file to remote state directory") err = f.Write(ctx, DeploymentStateFileName, local, filer.CreateParentDirectories, filer.OverwriteIfExists) if err != nil { diff --git a/bundle/deploy/terraform/state_push.go b/bundle/deploy/terraform/state_push.go index 84d8e7670..6cdde1371 100644 --- a/bundle/deploy/terraform/state_push.go +++ b/bundle/deploy/terraform/state_push.go @@ -47,17 +47,6 @@ func (l *statePush) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic } defer local.Close() - if !b.Config.Bundle.Force { - state, err := local.Stat() - if err != nil { - return diag.FromErr(err) - } - - if state.Size() > deploy.MaxStateFileSize { - return diag.Errorf("Terraform state file size exceeds the maximum allowed size of %d bytes. Please reduce the number of resources in your bundle, split your bundle into multiple or re-run the command with --force flag", deploy.MaxStateFileSize) - } - } - // Upload state file from local cache directory to filer. cmdio.LogString(ctx, "Updating deployment state...") log.Infof(ctx, "Writing local state file to remote state directory") diff --git a/bundle/deploy/terraform/state_push_test.go b/bundle/deploy/terraform/state_push_test.go index 54e7f621c..e022dee1b 100644 --- a/bundle/deploy/terraform/state_push_test.go +++ b/bundle/deploy/terraform/state_push_test.go @@ -3,7 +3,6 @@ package terraform import ( "context" "encoding/json" - "fmt" "io" "testing" @@ -60,29 +59,3 @@ func TestStatePush(t *testing.T) { diags := bundle.Apply(ctx, b, m) assert.NoError(t, diags.Error()) } - -func TestStatePushLargeState(t *testing.T) { - mock := mockfiler.NewMockFiler(t) - m := &statePush{ - identityFiler(mock), - } - - ctx := context.Background() - b := statePushTestBundle(t) - - largeState := map[string]any{} - for i := range 1000000 { - largeState[fmt.Sprintf("field_%d", i)] = i - } - - // Write a stale local state file. - writeLocalState(t, ctx, b, largeState) - diags := bundle.Apply(ctx, b, m) - assert.ErrorContains(t, diags.Error(), "Terraform state file size exceeds the maximum allowed size of 10485760 bytes. Please reduce the number of resources in your bundle, split your bundle into multiple or re-run the command with --force flag") - - // Force the write. - b = statePushTestBundle(t) - b.Config.Bundle.Force = true - diags = bundle.Apply(ctx, b, m) - assert.NoError(t, diags.Error()) -} diff --git a/libs/testserver/fake_workspace.go b/libs/testserver/fake_workspace.go index 4e943f828..80e88941d 100644 --- a/libs/testserver/fake_workspace.go +++ b/libs/testserver/fake_workspace.go @@ -83,6 +83,13 @@ func (s *FakeWorkspace) WorkspaceFilesImportFile(path string, body []byte) { s.files[path] = body } +func (s *FakeWorkspace) WorkspaceFilesExportFile(path string) []byte { + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + return s.files[path] +} + func (s *FakeWorkspace) JobsCreate(request jobs.CreateJob) Response { jobId := s.nextJobId s.nextJobId++