diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index 303c78553..2d9cb6d86 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -0c86ea6dbd9a730c24ff0d4e509603e476955ac5 \ No newline at end of file +cf9c61453990df0f9453670f2fe68e1b128647a2 \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index 2470eb33d..ae10198bb 100755 --- a/.gitattributes +++ b/.gitattributes @@ -54,6 +54,7 @@ cmd/workspace/dashboards/dashboards.go linguist-generated=true cmd/workspace/data-sources/data-sources.go linguist-generated=true cmd/workspace/default-namespace/default-namespace.go linguist-generated=true cmd/workspace/disable-legacy-access/disable-legacy-access.go linguist-generated=true +cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go linguist-generated=true cmd/workspace/enhanced-security-monitoring/enhanced-security-monitoring.go linguist-generated=true cmd/workspace/experiments/experiments.go linguist-generated=true cmd/workspace/external-locations/external-locations.go linguist-generated=true diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 000000000..a40cdf32c --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,60 @@ +name: integration + +on: + + pull_request: + types: [opened, synchronize] + + merge_group: + + +jobs: + trigger-tests: + if: github.event_name == 'pull_request' + runs-on: ubuntu-latest + environment: "test-trigger-is" + + steps: + - uses: actions/checkout@v4 + + - name: Generate GitHub App Token + id: generate-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }} + private-key: ${{ secrets.DECO_WORKFLOW_TRIGGER_PRIVATE_KEY }} + owner: ${{ secrets.ORG_NAME }} + repositories: ${{secrets.REPO_NAME}} + + - name: Trigger Workflow in Another Repo + env: + GH_TOKEN: ${{ steps.generate-token.outputs.token }} + run: | + gh workflow run cli-isolated-pr.yml -R ${{ secrets.ORG_NAME }}/${{secrets.REPO_NAME}} \ + --ref main \ + -f pull_request_number=${{ github.event.pull_request.number }} \ + -f commit_sha=${{ github.event.pull_request.head.sha }} + + + + # Statuses and checks apply to specific commits (by hash). + # Enforcement of required checks is done both at the PR level and the merge queue level. + # In case of multiple commits in a single PR, the hash of the squashed commit + # will not match the one for the latest (approved) commit in the PR. + # We auto approve the check for the merge queue for two reasons: + # * Queue times out due to duration of tests. + # * Avoid running integration tests twice, since it was already run at the tip of the branch before squashing. + auto-approve: + if: github.event_name == 'merge_group' + runs-on: ubuntu-latest + steps: + - name: Mark Check + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + shell: bash + run: | + gh api -X POST -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + /repos/${{ github.repository }}/statuses/${{ github.sha }} \ + -f 'state=success' \ + -f 'context=Integration Tests Check' diff --git a/CHANGELOG.md b/CHANGELOG.md index f31bb10b2..863474934 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Version changelog +## [Release] Release v0.231.0 + +CLI: + * Added JSON input validation for CLI commands ([#1771](https://github.com/databricks/cli/pull/1771)). + * Support Git worktrees for `sync` ([#1831](https://github.com/databricks/cli/pull/1831)). + +Bundles: + * Add `bundle summary` to display URLs for deployed resources ([#1731](https://github.com/databricks/cli/pull/1731)). + * Added a warning when incorrect permissions used for `/Workspace/Shared` bundle root ([#1821](https://github.com/databricks/cli/pull/1821)). + * Show actionable errors for collaborative deployment scenarios ([#1386](https://github.com/databricks/cli/pull/1386)). + * Fix path to repository-wide exclude file ([#1837](https://github.com/databricks/cli/pull/1837)). + * Fixed typo in converting cluster permissions ([#1826](https://github.com/databricks/cli/pull/1826)). + * Ignore metastore permission error during template generation ([#1819](https://github.com/databricks/cli/pull/1819)). + * Handle normalization of `dyn.KindTime` into an any type ([#1836](https://github.com/databricks/cli/pull/1836)). + * Added support for pip options in environment dependencies ([#1842](https://github.com/databricks/cli/pull/1842)). + * Fix race condition when restarting continuous jobs ([#1849](https://github.com/databricks/cli/pull/1849)). + * Fix pipeline in default-python template not working for certain workspaces ([#1854](https://github.com/databricks/cli/pull/1854)). + * Add "output" flag to the bundle sync command ([#1853](https://github.com/databricks/cli/pull/1853)). + +Internal: + * Move utility functions dealing with IAM to libs/iamutil ([#1820](https://github.com/databricks/cli/pull/1820)). + * Remove unused `IS_OWNER` constant ([#1823](https://github.com/databricks/cli/pull/1823)). + * Assert SDK version is consistent in the CLI generation process ([#1814](https://github.com/databricks/cli/pull/1814)). + * Fixed unmarshalling json input into `interface{}` type ([#1832](https://github.com/databricks/cli/pull/1832)). + * Fix `TestAccFsMkdirWhenFileExistsAtPath` in isolated Azure environments ([#1833](https://github.com/databricks/cli/pull/1833)). + * Add behavioral tests for examples from the YAML spec ([#1835](https://github.com/databricks/cli/pull/1835)). + * Remove Terraform conversion function that's no longer used ([#1840](https://github.com/databricks/cli/pull/1840)). + * Encode assumptions about the dashboards API in a test ([#1839](https://github.com/databricks/cli/pull/1839)). + * Add script to make testing of code on branches easier ([#1844](https://github.com/databricks/cli/pull/1844)). + +API Changes: + * Added `databricks disable-legacy-dbfs` command group. + +OpenAPI commit cf9c61453990df0f9453670f2fe68e1b128647a2 (2024-10-14) +Dependency updates: + * Upgrade TF provider to 1.54.0 ([#1852](https://github.com/databricks/cli/pull/1852)). + * Bump github.com/databricks/databricks-sdk-go from 0.48.0 to 0.49.0 ([#1843](https://github.com/databricks/cli/pull/1843)). + ## [Release] Release v0.230.0 Notable changes for Databricks Asset Bundles: diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index c03cee73e..9d655b27b 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -699,6 +699,9 @@ func TestTranslatePathJobEnvironments(t *testing.T) { "../dist/env2.whl", "simplejson", "/Workspace/Users/foo@bar.com/test.whl", + "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", + "foobar --extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple", + "https://foo@bar.com/packages/pypi/simple", }, }, }, @@ -719,6 +722,9 @@ func TestTranslatePathJobEnvironments(t *testing.T) { assert.Equal(t, strings.Join([]string{".", "dist", "env2.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) assert.Equal(t, "simplejson", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[2]) assert.Equal(t, "/Workspace/Users/foo@bar.com/test.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[3]) + assert.Equal(t, "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[4]) + assert.Equal(t, "foobar --extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[5]) + assert.Equal(t, "https://foo@bar.com/packages/pypi/simple", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[6]) } func TestTranslatePathWithComplexVariables(t *testing.T) { diff --git a/bundle/config/resources/permission.go b/bundle/config/resources/permission.go index fa2d8796c..62e18a09e 100644 --- a/bundle/config/resources/permission.go +++ b/bundle/config/resources/permission.go @@ -1,5 +1,7 @@ package resources +import "fmt" + // Permission holds the permission level setting for a single principal. // Multiple of these can be defined on any resource. type Permission struct { @@ -9,3 +11,19 @@ type Permission struct { ServicePrincipalName string `json:"service_principal_name,omitempty"` GroupName string `json:"group_name,omitempty"` } + +func (p Permission) String() string { + if p.UserName != "" { + return fmt.Sprintf("level: %s, user_name: %s", p.Level, p.UserName) + } + + if p.ServicePrincipalName != "" { + return fmt.Sprintf("level: %s, service_principal_name: %s", p.Level, p.ServicePrincipalName) + } + + if p.GroupName != "" { + return fmt.Sprintf("level: %s, group_name: %s", p.Level, p.GroupName) + } + + return fmt.Sprintf("level: %s", p.Level) +} diff --git a/bundle/config/validate/folder_permissions.go b/bundle/config/validate/folder_permissions.go new file mode 100644 index 000000000..a376bd776 --- /dev/null +++ b/bundle/config/validate/folder_permissions.go @@ -0,0 +1,126 @@ +package validate + +import ( + "context" + "fmt" + "path" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/bundle/permissions" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/service/workspace" + "golang.org/x/sync/errgroup" +) + +type folderPermissions struct { +} + +// Apply implements bundle.ReadOnlyMutator. +func (f *folderPermissions) Apply(ctx context.Context, b bundle.ReadOnlyBundle) diag.Diagnostics { + if len(b.Config().Permissions) == 0 { + return nil + } + + rootPath := b.Config().Workspace.RootPath + paths := []string{} + if !libraries.IsVolumesPath(rootPath) && !libraries.IsWorkspaceSharedPath(rootPath) { + paths = append(paths, rootPath) + } + + if !strings.HasSuffix(rootPath, "/") { + rootPath += "/" + } + + for _, p := range []string{ + b.Config().Workspace.ArtifactPath, + b.Config().Workspace.FilePath, + b.Config().Workspace.StatePath, + b.Config().Workspace.ResourcePath, + } { + if libraries.IsWorkspaceSharedPath(p) || libraries.IsVolumesPath(p) { + continue + } + + if strings.HasPrefix(p, rootPath) { + continue + } + + paths = append(paths, p) + } + + var diags diag.Diagnostics + g, ctx := errgroup.WithContext(ctx) + results := make([]diag.Diagnostics, len(paths)) + for i, p := range paths { + g.Go(func() error { + results[i] = checkFolderPermission(ctx, b, p) + return nil + }) + } + + if err := g.Wait(); err != nil { + return diag.FromErr(err) + } + + for _, r := range results { + diags = diags.Extend(r) + } + + return diags +} + +func checkFolderPermission(ctx context.Context, b bundle.ReadOnlyBundle, folderPath string) diag.Diagnostics { + w := b.WorkspaceClient().Workspace + obj, err := getClosestExistingObject(ctx, w, folderPath) + if err != nil { + return diag.FromErr(err) + } + + objPermissions, err := w.GetPermissions(ctx, workspace.GetWorkspaceObjectPermissionsRequest{ + WorkspaceObjectId: fmt.Sprint(obj.ObjectId), + WorkspaceObjectType: "directories", + }) + if err != nil { + return diag.FromErr(err) + } + + p := permissions.ObjectAclToResourcePermissions(folderPath, objPermissions.AccessControlList) + return p.Compare(b.Config().Permissions) +} + +func getClosestExistingObject(ctx context.Context, w workspace.WorkspaceInterface, folderPath string) (*workspace.ObjectInfo, error) { + for { + obj, err := w.GetStatusByPath(ctx, folderPath) + if err == nil { + return obj, nil + } + + if !apierr.IsMissing(err) { + return nil, err + } + + parent := path.Dir(folderPath) + // If the parent is the same as the current folder, then we have reached the root + if folderPath == parent { + break + } + + folderPath = parent + } + + return nil, fmt.Errorf("folder %s and its parent folders do not exist", folderPath) +} + +// Name implements bundle.ReadOnlyMutator. +func (f *folderPermissions) Name() string { + return "validate:folder_permissions" +} + +// ValidateFolderPermissions validates that permissions for the folders in Workspace file system matches +// the permissions in the top-level permissions section of the bundle. +func ValidateFolderPermissions() bundle.ReadOnlyMutator { + return &folderPermissions{} +} diff --git a/bundle/config/validate/folder_permissions_test.go b/bundle/config/validate/folder_permissions_test.go new file mode 100644 index 000000000..8e68c9fbf --- /dev/null +++ b/bundle/config/validate/folder_permissions_test.go @@ -0,0 +1,208 @@ +package validate + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/permissions" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestFolderPermissionsInheritedWhenRootPathDoesNotExist(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Users/foo@bar.com", + ArtifactPath: "/Workspace/Users/otherfoo@bar.com/artifacts", + FilePath: "/Workspace/Users/foo@bar.com/files", + StatePath: "/Workspace/Users/foo@bar.com/state", + ResourcePath: "/Workspace/Users/foo@bar.com/resources", + }, + Permissions: []resources.Permission{ + {Level: permissions.CAN_MANAGE, UserName: "foo@bar.com"}, + }, + }, + } + m := mocks.NewMockWorkspaceClient(t) + api := m.GetMockWorkspaceAPI() + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users/otherfoo@bar.com/artifacts").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users/otherfoo@bar.com").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users/foo@bar.com").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace").Return(&workspace.ObjectInfo{ + ObjectId: 1234, + }, nil) + + api.EXPECT().GetPermissions(mock.Anything, workspace.GetWorkspaceObjectPermissionsRequest{ + WorkspaceObjectId: "1234", + WorkspaceObjectType: "directories", + }).Return(&workspace.WorkspaceObjectPermissions{ + ObjectId: "1234", + AccessControlList: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + }, nil) + + b.SetWorkpaceClient(m.WorkspaceClient) + rb := bundle.ReadOnly(b) + + diags := bundle.ApplyReadOnly(context.Background(), rb, ValidateFolderPermissions()) + require.Empty(t, diags) +} + +func TestValidateFolderPermissionsFailsOnMissingBundlePermission(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Users/foo@bar.com", + ArtifactPath: "/Workspace/Users/foo@bar.com/artifacts", + FilePath: "/Workspace/Users/foo@bar.com/files", + StatePath: "/Workspace/Users/foo@bar.com/state", + ResourcePath: "/Workspace/Users/foo@bar.com/resources", + }, + Permissions: []resources.Permission{ + {Level: permissions.CAN_MANAGE, UserName: "foo@bar.com"}, + }, + }, + } + m := mocks.NewMockWorkspaceClient(t) + api := m.GetMockWorkspaceAPI() + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users/foo@bar.com").Return(&workspace.ObjectInfo{ + ObjectId: 1234, + }, nil) + + api.EXPECT().GetPermissions(mock.Anything, workspace.GetWorkspaceObjectPermissionsRequest{ + WorkspaceObjectId: "1234", + WorkspaceObjectType: "directories", + }).Return(&workspace.WorkspaceObjectPermissions{ + ObjectId: "1234", + AccessControlList: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + { + UserName: "foo2@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + }, nil) + + b.SetWorkpaceClient(m.WorkspaceClient) + rb := bundle.ReadOnly(b) + + diags := bundle.ApplyReadOnly(context.Background(), rb, ValidateFolderPermissions()) + require.Len(t, diags, 1) + require.Equal(t, "untracked permissions apply to target workspace path", diags[0].Summary) + require.Equal(t, diag.Warning, diags[0].Severity) + require.Equal(t, "The following permissions apply to the workspace folder at \"/Workspace/Users/foo@bar.com\" but are not configured in the bundle:\n- level: CAN_MANAGE, user_name: foo2@bar.com\n", diags[0].Detail) +} + +func TestValidateFolderPermissionsFailsOnPermissionMismatch(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Users/foo@bar.com", + ArtifactPath: "/Workspace/Users/foo@bar.com/artifacts", + FilePath: "/Workspace/Users/foo@bar.com/files", + StatePath: "/Workspace/Users/foo@bar.com/state", + ResourcePath: "/Workspace/Users/foo@bar.com/resources", + }, + Permissions: []resources.Permission{ + {Level: permissions.CAN_MANAGE, UserName: "foo@bar.com"}, + }, + }, + } + m := mocks.NewMockWorkspaceClient(t) + api := m.GetMockWorkspaceAPI() + api.EXPECT().GetStatusByPath(mock.Anything, "/Workspace/Users/foo@bar.com").Return(&workspace.ObjectInfo{ + ObjectId: 1234, + }, nil) + + api.EXPECT().GetPermissions(mock.Anything, workspace.GetWorkspaceObjectPermissionsRequest{ + WorkspaceObjectId: "1234", + WorkspaceObjectType: "directories", + }).Return(&workspace.WorkspaceObjectPermissions{ + ObjectId: "1234", + AccessControlList: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo2@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + }, nil) + + b.SetWorkpaceClient(m.WorkspaceClient) + rb := bundle.ReadOnly(b) + + diags := bundle.ApplyReadOnly(context.Background(), rb, ValidateFolderPermissions()) + require.Len(t, diags, 1) + require.Equal(t, "untracked permissions apply to target workspace path", diags[0].Summary) + require.Equal(t, diag.Warning, diags[0].Severity) +} + +func TestValidateFolderPermissionsFailsOnNoRootFolder(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/NotExisting", + ArtifactPath: "/NotExisting/artifacts", + FilePath: "/NotExisting/files", + StatePath: "/NotExisting/state", + ResourcePath: "/NotExisting/resources", + }, + Permissions: []resources.Permission{ + {Level: permissions.CAN_MANAGE, UserName: "foo@bar.com"}, + }, + }, + } + m := mocks.NewMockWorkspaceClient(t) + api := m.GetMockWorkspaceAPI() + api.EXPECT().GetStatusByPath(mock.Anything, "/NotExisting").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + api.EXPECT().GetStatusByPath(mock.Anything, "/").Return(nil, &apierr.APIError{ + StatusCode: 404, + ErrorCode: "RESOURCE_DOES_NOT_EXIST", + }) + + b.SetWorkpaceClient(m.WorkspaceClient) + rb := bundle.ReadOnly(b) + + diags := bundle.ApplyReadOnly(context.Background(), rb, ValidateFolderPermissions()) + require.Len(t, diags, 1) + require.Equal(t, "folder / and its parent folders do not exist", diags[0].Summary) + require.Equal(t, diag.Error, diags[0].Severity) +} diff --git a/bundle/config/validate/validate.go b/bundle/config/validate/validate.go index 79f42bd23..440477e65 100644 --- a/bundle/config/validate/validate.go +++ b/bundle/config/validate/validate.go @@ -35,6 +35,7 @@ func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics FilesToSync(), ValidateSyncPatterns(), JobTaskClusterSpec(), + ValidateFolderPermissions(), )) } diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index 49e48a6e3..0c4244089 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,3 @@ package schema -const ProviderVersion = "1.53.0" +const ProviderVersion = "1.54.0" diff --git a/bundle/internal/tf/schema/data_source_notification_destinations.go b/bundle/internal/tf/schema/data_source_notification_destinations.go new file mode 100644 index 000000000..c95ad6db9 --- /dev/null +++ b/bundle/internal/tf/schema/data_source_notification_destinations.go @@ -0,0 +1,15 @@ +// Generated from Databricks Terraform provider schema. DO NOT EDIT. + +package schema + +type DataSourceNotificationDestinationsNotificationDestinations struct { + DestinationType string `json:"destination_type,omitempty"` + DisplayName string `json:"display_name,omitempty"` + Id string `json:"id,omitempty"` +} + +type DataSourceNotificationDestinations struct { + DisplayNameContains string `json:"display_name_contains,omitempty"` + Type string `json:"type,omitempty"` + NotificationDestinations []DataSourceNotificationDestinationsNotificationDestinations `json:"notification_destinations,omitempty"` +} diff --git a/bundle/internal/tf/schema/data_source_registered_model.go b/bundle/internal/tf/schema/data_source_registered_model.go new file mode 100644 index 000000000..e19e0849a --- /dev/null +++ b/bundle/internal/tf/schema/data_source_registered_model.go @@ -0,0 +1,32 @@ +// Generated from Databricks Terraform provider schema. DO NOT EDIT. + +package schema + +type DataSourceRegisteredModelModelInfoAliases struct { + AliasName string `json:"alias_name,omitempty"` + VersionNum int `json:"version_num,omitempty"` +} + +type DataSourceRegisteredModelModelInfo struct { + BrowseOnly bool `json:"browse_only,omitempty"` + CatalogName string `json:"catalog_name,omitempty"` + Comment string `json:"comment,omitempty"` + CreatedAt int `json:"created_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + FullName string `json:"full_name,omitempty"` + MetastoreId string `json:"metastore_id,omitempty"` + Name string `json:"name,omitempty"` + Owner string `json:"owner,omitempty"` + SchemaName string `json:"schema_name,omitempty"` + StorageLocation string `json:"storage_location,omitempty"` + UpdatedAt int `json:"updated_at,omitempty"` + UpdatedBy string `json:"updated_by,omitempty"` + Aliases []DataSourceRegisteredModelModelInfoAliases `json:"aliases,omitempty"` +} + +type DataSourceRegisteredModel struct { + FullName string `json:"full_name"` + IncludeAliases bool `json:"include_aliases,omitempty"` + IncludeBrowse bool `json:"include_browse,omitempty"` + ModelInfo []DataSourceRegisteredModelModelInfo `json:"model_info,omitempty"` +} diff --git a/bundle/internal/tf/schema/data_sources.go b/bundle/internal/tf/schema/data_sources.go index 10829b994..050e0bc1d 100644 --- a/bundle/internal/tf/schema/data_sources.go +++ b/bundle/internal/tf/schema/data_sources.go @@ -36,7 +36,9 @@ type DataSources struct { NodeType map[string]any `json:"databricks_node_type,omitempty"` Notebook map[string]any `json:"databricks_notebook,omitempty"` NotebookPaths map[string]any `json:"databricks_notebook_paths,omitempty"` + NotificationDestinations map[string]any `json:"databricks_notification_destinations,omitempty"` Pipelines map[string]any `json:"databricks_pipelines,omitempty"` + RegisteredModel map[string]any `json:"databricks_registered_model,omitempty"` Schema map[string]any `json:"databricks_schema,omitempty"` Schemas map[string]any `json:"databricks_schemas,omitempty"` ServicePrincipal map[string]any `json:"databricks_service_principal,omitempty"` @@ -92,7 +94,9 @@ func NewDataSources() *DataSources { NodeType: make(map[string]any), Notebook: make(map[string]any), NotebookPaths: make(map[string]any), + NotificationDestinations: make(map[string]any), Pipelines: make(map[string]any), + RegisteredModel: make(map[string]any), Schema: make(map[string]any), Schemas: make(map[string]any), ServicePrincipal: make(map[string]any), diff --git a/bundle/internal/tf/schema/resource_job.go b/bundle/internal/tf/schema/resource_job.go index 42b648b0f..c89eafab9 100644 --- a/bundle/internal/tf/schema/resource_job.go +++ b/bundle/internal/tf/schema/resource_job.go @@ -1448,6 +1448,7 @@ type ResourceJobWebhookNotifications struct { type ResourceJob struct { AlwaysRunning bool `json:"always_running,omitempty"` + BudgetPolicyId string `json:"budget_policy_id,omitempty"` ControlRunState bool `json:"control_run_state,omitempty"` Description string `json:"description,omitempty"` EditMode string `json:"edit_mode,omitempty"` diff --git a/bundle/internal/tf/schema/resource_online_table.go b/bundle/internal/tf/schema/resource_online_table.go index de671eade..58d6f4ba5 100644 --- a/bundle/internal/tf/schema/resource_online_table.go +++ b/bundle/internal/tf/schema/resource_online_table.go @@ -19,9 +19,10 @@ type ResourceOnlineTableSpec struct { } type ResourceOnlineTable struct { - Id string `json:"id,omitempty"` - Name string `json:"name"` - Status []any `json:"status,omitempty"` - TableServingUrl string `json:"table_serving_url,omitempty"` - Spec *ResourceOnlineTableSpec `json:"spec,omitempty"` + Id string `json:"id,omitempty"` + Name string `json:"name"` + Status []any `json:"status,omitempty"` + TableServingUrl string `json:"table_serving_url,omitempty"` + UnityCatalogProvisioningState string `json:"unity_catalog_provisioning_state,omitempty"` + Spec *ResourceOnlineTableSpec `json:"spec,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_pipeline.go b/bundle/internal/tf/schema/resource_pipeline.go index 1bed91fcb..2cb459aba 100644 --- a/bundle/internal/tf/schema/resource_pipeline.go +++ b/bundle/internal/tf/schema/resource_pipeline.go @@ -142,10 +142,26 @@ type ResourcePipelineGatewayDefinition struct { GatewayStorageSchema string `json:"gateway_storage_schema,omitempty"` } +type ResourcePipelineIngestionDefinitionObjectsReportTableConfiguration struct { + PrimaryKeys []string `json:"primary_keys,omitempty"` + SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` + ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` +} + +type ResourcePipelineIngestionDefinitionObjectsReport struct { + DestinationCatalog string `json:"destination_catalog,omitempty"` + DestinationSchema string `json:"destination_schema,omitempty"` + DestinationTable string `json:"destination_table,omitempty"` + SourceUrl string `json:"source_url,omitempty"` + TableConfiguration *ResourcePipelineIngestionDefinitionObjectsReportTableConfiguration `json:"table_configuration,omitempty"` +} + type ResourcePipelineIngestionDefinitionObjectsSchemaTableConfiguration struct { PrimaryKeys []string `json:"primary_keys,omitempty"` SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` } type ResourcePipelineIngestionDefinitionObjectsSchema struct { @@ -160,6 +176,7 @@ type ResourcePipelineIngestionDefinitionObjectsTableTableConfiguration struct { PrimaryKeys []string `json:"primary_keys,omitempty"` SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` } type ResourcePipelineIngestionDefinitionObjectsTable struct { @@ -173,6 +190,7 @@ type ResourcePipelineIngestionDefinitionObjectsTable struct { } type ResourcePipelineIngestionDefinitionObjects struct { + Report *ResourcePipelineIngestionDefinitionObjectsReport `json:"report,omitempty"` Schema *ResourcePipelineIngestionDefinitionObjectsSchema `json:"schema,omitempty"` Table *ResourcePipelineIngestionDefinitionObjectsTable `json:"table,omitempty"` } @@ -181,6 +199,7 @@ type ResourcePipelineIngestionDefinitionTableConfiguration struct { PrimaryKeys []string `json:"primary_keys,omitempty"` SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` } type ResourcePipelineIngestionDefinition struct { diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index 7a0cc01f9..bf4283c9b 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -21,7 +21,7 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" -const ProviderVersion = "1.53.0" +const ProviderVersion = "1.54.0" func NewRoot() *Root { return &Root{ diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 6d60d56bc..53b714104 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -57,6 +57,12 @@ func IsLibraryLocal(dep string) bool { } } + // If the dependency starts with --, it's a pip flag option which is a valid + // entry for environment dependencies but not a local path + if containsPipFlag(dep) { + return false + } + // If the dependency is a requirements file, it's not a valid local path if strings.HasPrefix(dep, "-r") { return false @@ -70,6 +76,11 @@ func IsLibraryLocal(dep string) bool { return IsLocalPath(dep) } +func containsPipFlag(input string) bool { + re := regexp.MustCompile(`--[a-zA-Z0-9-]+`) + return re.MatchString(input) +} + // ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_). // \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. // ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?): Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). diff --git a/bundle/libraries/workspace_path.go b/bundle/libraries/path.go similarity index 78% rename from bundle/libraries/workspace_path.go rename to bundle/libraries/path.go index 126ad3f13..3bad40fac 100644 --- a/bundle/libraries/workspace_path.go +++ b/bundle/libraries/path.go @@ -36,3 +36,12 @@ func IsWorkspaceLibrary(library *compute.Library) bool { return IsWorkspacePath(path) } + +// IsVolumesPath returns true if the specified path indicates that +func IsVolumesPath(path string) bool { + return strings.HasPrefix(path, "/Volumes/") +} + +func IsWorkspaceSharedPath(path string) bool { + return strings.HasPrefix(path, "/Workspace/Shared/") +} diff --git a/bundle/libraries/workspace_path_test.go b/bundle/libraries/path_test.go similarity index 77% rename from bundle/libraries/workspace_path_test.go rename to bundle/libraries/path_test.go index feaaab7f7..90fe187a2 100644 --- a/bundle/libraries/workspace_path_test.go +++ b/bundle/libraries/path_test.go @@ -31,3 +31,13 @@ func TestIsWorkspaceLibrary(t *testing.T) { // Empty. assert.False(t, IsWorkspaceLibrary(&compute.Library{})) } + +func TestIsVolumesPath(t *testing.T) { + // Absolute paths with particular prefixes. + assert.True(t, IsVolumesPath("/Volumes/path/to/package")) + + // Relative paths. + assert.False(t, IsVolumesPath("myfile.txt")) + assert.False(t, IsVolumesPath("./myfile.txt")) + assert.False(t, IsVolumesPath("../myfile.txt")) +} diff --git a/bundle/permissions/validate.go b/bundle/permissions/validate.go index acd2e6062..f1a18f430 100644 --- a/bundle/permissions/validate.go +++ b/bundle/permissions/validate.go @@ -3,9 +3,9 @@ package permissions import ( "context" "fmt" - "strings" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/libs/diag" ) @@ -21,17 +21,13 @@ func (*validateSharedRootPermissions) Name() string { } func (*validateSharedRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - if isWorkspaceSharedRoot(b.Config.Workspace.RootPath) { + if libraries.IsWorkspaceSharedPath(b.Config.Workspace.RootPath) { return isUsersGroupPermissionSet(b) } return nil } -func isWorkspaceSharedRoot(path string) bool { - return strings.HasPrefix(path, "/Workspace/Shared/") -} - // isUsersGroupPermissionSet checks that top-level permissions set for bundle contain group_name: users with CAN_MANAGE permission. func isUsersGroupPermissionSet(b *bundle.Bundle) diag.Diagnostics { var diags diag.Diagnostics diff --git a/bundle/permissions/workspace_path_permissions.go b/bundle/permissions/workspace_path_permissions.go new file mode 100644 index 000000000..a3b4424c1 --- /dev/null +++ b/bundle/permissions/workspace_path_permissions.go @@ -0,0 +1,89 @@ +package permissions + +import ( + "fmt" + "strings" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/service/workspace" +) + +type WorkspacePathPermissions struct { + Path string + Permissions []resources.Permission +} + +func ObjectAclToResourcePermissions(path string, acl []workspace.WorkspaceObjectAccessControlResponse) *WorkspacePathPermissions { + permissions := make([]resources.Permission, 0) + for _, a := range acl { + // Skip the admin group because it's added to all resources by default. + if a.GroupName == "admins" { + continue + } + + for _, pl := range a.AllPermissions { + permissions = append(permissions, resources.Permission{ + Level: convertWorkspaceObjectPermissionLevel(pl.PermissionLevel), + GroupName: a.GroupName, + UserName: a.UserName, + ServicePrincipalName: a.ServicePrincipalName, + }) + } + } + + return &WorkspacePathPermissions{Permissions: permissions, Path: path} +} + +func (p WorkspacePathPermissions) Compare(perms []resources.Permission) diag.Diagnostics { + var diags diag.Diagnostics + + // Check the permissions in the workspace and see if they are all set in the bundle. + ok, missing := containsAll(p.Permissions, perms) + if !ok { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: "untracked permissions apply to target workspace path", + Detail: fmt.Sprintf("The following permissions apply to the workspace folder at %q but are not configured in the bundle:\n%s", p.Path, toString(missing)), + }) + } + + return diags +} + +// containsAll checks if permA contains all permissions in permB. +func containsAll(permA []resources.Permission, permB []resources.Permission) (bool, []resources.Permission) { + missing := make([]resources.Permission, 0) + for _, a := range permA { + found := false + for _, b := range permB { + if a == b { + found = true + break + } + } + if !found { + missing = append(missing, a) + } + } + return len(missing) == 0, missing +} + +// convertWorkspaceObjectPermissionLevel converts matching object permission levels to bundle ones. +// If there is no matching permission level, it returns permission level as is, for example, CAN_EDIT. +func convertWorkspaceObjectPermissionLevel(level workspace.WorkspaceObjectPermissionLevel) string { + switch level { + case workspace.WorkspaceObjectPermissionLevelCanRead: + return CAN_VIEW + default: + return string(level) + } +} + +func toString(p []resources.Permission) string { + var sb strings.Builder + for _, perm := range p { + sb.WriteString(fmt.Sprintf("- %s\n", perm.String())) + } + return sb.String() +} diff --git a/bundle/permissions/workspace_path_permissions_test.go b/bundle/permissions/workspace_path_permissions_test.go new file mode 100644 index 000000000..0bb00474c --- /dev/null +++ b/bundle/permissions/workspace_path_permissions_test.go @@ -0,0 +1,121 @@ +package permissions + +import ( + "testing" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/require" +) + +func TestWorkspacePathPermissionsCompare(t *testing.T) { + testCases := []struct { + perms []resources.Permission + acl []workspace.WorkspaceObjectAccessControlResponse + expected diag.Diagnostics + }{ + { + perms: []resources.Permission{ + {Level: CAN_MANAGE, UserName: "foo@bar.com"}, + }, + acl: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + expected: nil, + }, + { + perms: []resources.Permission{ + {Level: CAN_MANAGE, UserName: "foo@bar.com"}, + }, + acl: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + { + GroupName: "admins", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + expected: nil, + }, + { + perms: []resources.Permission{ + {Level: CAN_VIEW, UserName: "foo@bar.com"}, + {Level: CAN_MANAGE, ServicePrincipalName: "sp.com"}, + }, + acl: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_READ"}, + }, + }, + }, + expected: nil, + }, + { + perms: []resources.Permission{ + {Level: CAN_MANAGE, UserName: "foo@bar.com"}, + }, + acl: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + { + GroupName: "foo", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + expected: diag.Diagnostics{ + { + Severity: diag.Warning, + Summary: "untracked permissions apply to target workspace path", + Detail: "The following permissions apply to the workspace folder at \"path\" but are not configured in the bundle:\n- level: CAN_MANAGE, group_name: foo\n", + }, + }, + }, + { + perms: []resources.Permission{ + {Level: CAN_MANAGE, UserName: "foo@bar.com"}, + }, + acl: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "foo2@bar.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + {PermissionLevel: "CAN_MANAGE"}, + }, + }, + }, + expected: diag.Diagnostics{ + { + Severity: diag.Warning, + Summary: "untracked permissions apply to target workspace path", + Detail: "The following permissions apply to the workspace folder at \"path\" but are not configured in the bundle:\n- level: CAN_MANAGE, user_name: foo2@bar.com\n", + }, + }, + }, + } + + for _, tc := range testCases { + wp := ObjectAclToResourcePermissions("path", tc.acl) + diags := wp.Compare(tc.perms) + require.Equal(t, tc.expected, diags) + } + +} diff --git a/bundle/permissions/workspace_root.go b/bundle/permissions/workspace_root.go index e7867521e..93a90ed9c 100644 --- a/bundle/permissions/workspace_root.go +++ b/bundle/permissions/workspace_root.go @@ -34,7 +34,7 @@ func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle) error { permissions := make([]workspace.WorkspaceObjectAccessControlRequest, 0) for _, p := range b.Config.Permissions { - level, err := getWorkspaceObjectPermissionLevel(p.Level) + level, err := GetWorkspaceObjectPermissionLevel(p.Level) if err != nil { return err } @@ -65,7 +65,7 @@ func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle) error { return err } -func getWorkspaceObjectPermissionLevel(bundlePermission string) (workspace.WorkspaceObjectPermissionLevel, error) { +func GetWorkspaceObjectPermissionLevel(bundlePermission string) (workspace.WorkspaceObjectPermissionLevel, error) { switch bundlePermission { case CAN_MANAGE: return workspace.WorkspaceObjectPermissionLevelCanManage, nil diff --git a/bundle/resources/completion.go b/bundle/resources/completion.go new file mode 100644 index 000000000..c1bcd022f --- /dev/null +++ b/bundle/resources/completion.go @@ -0,0 +1,17 @@ +package resources + +import "github.com/databricks/cli/bundle" + +// Completions returns the same as [References] except +// that every key maps directly to a single reference. +func Completions(b *bundle.Bundle, filters ...Filter) map[string]Reference { + out := make(map[string]Reference) + keyOnlyRefs, _ := References(b, filters...) + for k, refs := range keyOnlyRefs { + if len(refs) != 1 { + continue + } + out[k] = refs[0] + } + return out +} diff --git a/bundle/resources/completion_test.go b/bundle/resources/completion_test.go new file mode 100644 index 000000000..2f7942aae --- /dev/null +++ b/bundle/resources/completion_test.go @@ -0,0 +1,58 @@ +package resources + +import ( + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/stretchr/testify/assert" +) + +func TestCompletions_SkipDuplicates(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": {}, + "bar": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "foo": {}, + }, + }, + }, + } + + // Test that this skips duplicates and only includes unambiguous completions. + out := Completions(b) + if assert.Len(t, out, 1) { + assert.Contains(t, out, "bar") + } +} + +func TestCompletions_Filter(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "bar": {}, + }, + }, + }, + } + + includeJobs := func(ref Reference) bool { + _, ok := ref.Resource.(*resources.Job) + return ok + } + + // Test that this does not include the pipeline. + out := Completions(b, includeJobs) + if assert.Len(t, out, 1) { + assert.Contains(t, out, "foo") + } +} diff --git a/bundle/resources/lookup.go b/bundle/resources/lookup.go new file mode 100644 index 000000000..48d862230 --- /dev/null +++ b/bundle/resources/lookup.go @@ -0,0 +1,98 @@ +package resources + +import ( + "fmt" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" +) + +// Reference is a reference to a resource. +// It includes the resource type description, and a reference to the resource itself. +type Reference struct { + // Key is the unique key of the resource, e.g. "my_job". + Key string + + // KeyWithType is the unique key of the resource, including the resource type, e.g. "jobs.my_job". + KeyWithType string + + // Description is the resource type description. + Description config.ResourceDescription + + // Resource is the resource itself. + Resource config.ConfigResource +} + +// Map is the core type for resource lookup and completion. +type Map map[string][]Reference + +// Filter defines the function signature for filtering resources. +type Filter func(Reference) bool + +// includeReference checks if the specified reference passes all filters. +// If the list of filters is empty, the reference is always included. +func includeReference(filters []Filter, ref Reference) bool { + for _, filter := range filters { + if !filter(ref) { + return false + } + } + return true +} + +// References returns maps of resource keys to a slice of [Reference]. +// +// The first map is indexed by the resource key only. +// The second map is indexed by the resource type name and its key. +// +// While the return types allows for multiple resources to share the same key, +// this is confirmed not to happen in the [validate.UniqueResourceKeys] mutator. +func References(b *bundle.Bundle, filters ...Filter) (Map, Map) { + keyOnly := make(Map) + keyWithType := make(Map) + + // Collect map of resource references indexed by their keys. + for _, group := range b.Config.Resources.AllResources() { + for k, v := range group.Resources { + ref := Reference{ + Key: k, + KeyWithType: fmt.Sprintf("%s.%s", group.Description.PluralName, k), + Description: group.Description, + Resource: v, + } + + // Skip resources that do not pass all filters. + if !includeReference(filters, ref) { + continue + } + + keyOnly[ref.Key] = append(keyOnly[ref.Key], ref) + keyWithType[ref.KeyWithType] = append(keyWithType[ref.KeyWithType], ref) + } + } + + return keyOnly, keyWithType +} + +// Lookup returns the resource with the specified key. +// If the key maps to more than one resource, an error is returned. +// If the key does not map to any resource, an error is returned. +func Lookup(b *bundle.Bundle, key string, filters ...Filter) (Reference, error) { + keyOnlyRefs, keyWithTypeRefs := References(b, filters...) + refs, ok := keyOnlyRefs[key] + if !ok { + refs, ok = keyWithTypeRefs[key] + if !ok { + return Reference{}, fmt.Errorf("resource with key %q not found", key) + } + } + + switch { + case len(refs) == 1: + return refs[0], nil + case len(refs) > 1: + return Reference{}, fmt.Errorf("multiple resources with key %q found", key) + default: + panic("unreachable") + } +} diff --git a/bundle/resources/lookup_test.go b/bundle/resources/lookup_test.go new file mode 100644 index 000000000..b2eaafd1a --- /dev/null +++ b/bundle/resources/lookup_test.go @@ -0,0 +1,117 @@ +package resources + +import ( + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLookup_EmptyBundle(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{}, + }, + } + + _, err := Lookup(b, "foo") + require.Error(t, err) + assert.ErrorContains(t, err, "resource with key \"foo\" not found") +} + +func TestLookup_NotFound(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": {}, + "bar": {}, + }, + }, + }, + } + + _, err := Lookup(b, "qux") + require.Error(t, err) + assert.ErrorContains(t, err, `resource with key "qux" not found`) +} + +func TestLookup_MultipleFound(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "foo": {}, + }, + }, + }, + } + + _, err := Lookup(b, "foo") + require.Error(t, err) + assert.ErrorContains(t, err, `multiple resources with key "foo" found`) +} + +func TestLookup_Nominal(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": { + JobSettings: &jobs.JobSettings{ + Name: "Foo job", + }, + }, + }, + }, + }, + } + + // Lookup by key only. + out, err := Lookup(b, "foo") + if assert.NoError(t, err) { + assert.Equal(t, "Foo job", out.Resource.GetName()) + } + + // Lookup by type and key. + out, err = Lookup(b, "jobs.foo") + if assert.NoError(t, err) { + assert.Equal(t, "Foo job", out.Resource.GetName()) + } +} + +func TestLookup_NominalWithFilters(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "bar": {}, + }, + }, + }, + } + + includeJobs := func(ref Reference) bool { + _, ok := ref.Resource.(*resources.Job) + return ok + } + + // This should succeed because the filter includes jobs. + _, err := Lookup(b, "foo", includeJobs) + require.NoError(t, err) + + // This should fail because the filter excludes pipelines. + _, err = Lookup(b, "bar", includeJobs) + require.Error(t, err) + assert.ErrorContains(t, err, `resource with key "bar" not found`) +} diff --git a/bundle/run/job.go b/bundle/run/job.go index 8003c7d29..340af961c 100644 --- a/bundle/run/job.go +++ b/bundle/run/job.go @@ -317,6 +317,29 @@ func (r *jobRunner) Cancel(ctx context.Context) error { return errGroup.Wait() } +func (r *jobRunner) Restart(ctx context.Context, opts *Options) (output.RunOutput, error) { + // We don't need to cancel existing runs if the job is continuous and unpaused. + // the /jobs/run-now API will automatically cancel any existing runs before starting a new one. + // + // /jobs/run-now will not cancel existing runs if the job is continuous and paused. + // New job runs will be queued instead and will wait for existing runs to finish. + // In this case, we need to cancel the existing runs before starting a new one. + continuous := r.job.JobSettings.Continuous + if continuous != nil && continuous.PauseStatus == jobs.PauseStatusUnpaused { + return r.Run(ctx, opts) + } + + s := cmdio.Spinner(ctx) + s <- "Cancelling all active job runs" + err := r.Cancel(ctx) + close(s) + if err != nil { + return nil, err + } + + return r.Run(ctx, opts) +} + func (r *jobRunner) ParseArgs(args []string, opts *Options) error { return r.posArgsHandler().ParseArgs(args, opts) } diff --git a/bundle/run/job_test.go b/bundle/run/job_test.go index be189306b..369c546aa 100644 --- a/bundle/run/job_test.go +++ b/bundle/run/job_test.go @@ -1,6 +1,7 @@ package run import ( + "bytes" "context" "testing" "time" @@ -8,6 +9,8 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" "github.com/databricks/databricks-sdk-go/experimental/mocks" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/mock" @@ -126,3 +129,132 @@ func TestJobRunnerCancelWithNoActiveRuns(t *testing.T) { err := runner.Cancel(context.Background()) require.NoError(t, err) } + +func TestJobRunnerRestart(t *testing.T) { + for _, jobSettings := range []*jobs.JobSettings{ + {}, + { + Continuous: &jobs.Continuous{ + PauseStatus: jobs.PauseStatusPaused, + }, + }, + } { + job := &resources.Job{ + ID: "123", + JobSettings: jobSettings, + } + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "test_job": job, + }, + }, + }, + } + + runner := jobRunner{key: "test", bundle: b, job: job} + + m := mocks.NewMockWorkspaceClient(t) + b.SetWorkpaceClient(m.WorkspaceClient) + ctx := context.Background() + ctx = cmdio.InContext(ctx, cmdio.NewIO(flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "")) + ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) + + jobApi := m.GetMockJobsAPI() + jobApi.EXPECT().ListRunsAll(mock.Anything, jobs.ListRunsRequest{ + ActiveOnly: true, + JobId: 123, + }).Return([]jobs.BaseRun{ + {RunId: 1}, + {RunId: 2}, + }, nil) + + // Mock the runner cancelling existing job runs. + mockWait := &jobs.WaitGetRunJobTerminatedOrSkipped[struct{}]{ + Poll: func(time time.Duration, f func(j *jobs.Run)) (*jobs.Run, error) { + return nil, nil + }, + } + jobApi.EXPECT().CancelRun(mock.Anything, jobs.CancelRun{ + RunId: 1, + }).Return(mockWait, nil) + jobApi.EXPECT().CancelRun(mock.Anything, jobs.CancelRun{ + RunId: 2, + }).Return(mockWait, nil) + + // Mock the runner triggering a job run + mockWaitForRun := &jobs.WaitGetRunJobTerminatedOrSkipped[jobs.RunNowResponse]{ + Poll: func(d time.Duration, f func(*jobs.Run)) (*jobs.Run, error) { + return &jobs.Run{ + State: &jobs.RunState{ + ResultState: jobs.RunResultStateSuccess, + }, + }, nil + }, + } + jobApi.EXPECT().RunNow(mock.Anything, jobs.RunNow{ + JobId: 123, + }).Return(mockWaitForRun, nil) + + // Mock the runner getting the job output + jobApi.EXPECT().GetRun(mock.Anything, jobs.GetRunRequest{}).Return(&jobs.Run{}, nil) + + _, err := runner.Restart(ctx, &Options{}) + require.NoError(t, err) + } +} + +func TestJobRunnerRestartForContinuousUnpausedJobs(t *testing.T) { + job := &resources.Job{ + ID: "123", + JobSettings: &jobs.JobSettings{ + Continuous: &jobs.Continuous{ + PauseStatus: jobs.PauseStatusUnpaused, + }, + }, + } + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "test_job": job, + }, + }, + }, + } + + runner := jobRunner{key: "test", bundle: b, job: job} + + m := mocks.NewMockWorkspaceClient(t) + b.SetWorkpaceClient(m.WorkspaceClient) + ctx := context.Background() + ctx = cmdio.InContext(ctx, cmdio.NewIO(flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) + + jobApi := m.GetMockJobsAPI() + + // The runner should not try and cancel existing job runs for unpaused continuous jobs. + jobApi.AssertNotCalled(t, "ListRunsAll") + jobApi.AssertNotCalled(t, "CancelRun") + + // Mock the runner triggering a job run + mockWaitForRun := &jobs.WaitGetRunJobTerminatedOrSkipped[jobs.RunNowResponse]{ + Poll: func(d time.Duration, f func(*jobs.Run)) (*jobs.Run, error) { + return &jobs.Run{ + State: &jobs.RunState{ + ResultState: jobs.RunResultStateSuccess, + }, + }, nil + }, + } + jobApi.EXPECT().RunNow(mock.Anything, jobs.RunNow{ + JobId: 123, + }).Return(mockWaitForRun, nil) + + // Mock the runner getting the job output + jobApi.EXPECT().GetRun(mock.Anything, jobs.GetRunRequest{}).Return(&jobs.Run{}, nil) + + _, err := runner.Restart(ctx, &Options{}) + require.NoError(t, err) +} diff --git a/bundle/run/keys.go b/bundle/run/keys.go deleted file mode 100644 index 76ec50ac8..000000000 --- a/bundle/run/keys.go +++ /dev/null @@ -1,69 +0,0 @@ -package run - -import ( - "fmt" - - "github.com/databricks/cli/bundle" - "golang.org/x/exp/maps" -) - -// RunnerLookup maps identifiers to a list of workloads that match that identifier. -// The list can have more than 1 entry if resources of different types use the -// same key. When this happens, the user should disambiguate between them. -type RunnerLookup map[string][]Runner - -// ResourceKeys computes a map with -func ResourceKeys(b *bundle.Bundle) (keyOnly RunnerLookup, keyWithType RunnerLookup) { - keyOnly = make(RunnerLookup) - keyWithType = make(RunnerLookup) - - r := b.Config.Resources - for k, v := range r.Jobs { - kt := fmt.Sprintf("jobs.%s", k) - w := jobRunner{key: key(kt), bundle: b, job: v} - keyOnly[k] = append(keyOnly[k], &w) - keyWithType[kt] = append(keyWithType[kt], &w) - } - for k, v := range r.Pipelines { - kt := fmt.Sprintf("pipelines.%s", k) - w := pipelineRunner{key: key(kt), bundle: b, pipeline: v} - keyOnly[k] = append(keyOnly[k], &w) - keyWithType[kt] = append(keyWithType[kt], &w) - } - return -} - -// ResourceCompletionMap returns a map of resource keys to their respective names. -func ResourceCompletionMap(b *bundle.Bundle) map[string]string { - out := make(map[string]string) - keyOnly, keyWithType := ResourceKeys(b) - - // Keep track of resources we have seen by their fully qualified key. - seen := make(map[string]bool) - - // First add resources that can be identified by key alone. - for k, v := range keyOnly { - // Invariant: len(v) >= 1. See [ResourceKeys]. - if len(v) == 1 { - seen[v[0].Key()] = true - out[k] = v[0].Name() - } - } - - // Then add resources that can only be identified by their type and key. - for k, v := range keyWithType { - // Invariant: len(v) == 1. See [ResourceKeys]. - _, ok := seen[v[0].Key()] - if ok { - continue - } - out[k] = v[0].Name() - } - - return out -} - -// ResourceCompletions returns a list of keys that unambiguously reference resources in the bundle. -func ResourceCompletions(b *bundle.Bundle) []string { - return maps.Keys(ResourceCompletionMap(b)) -} diff --git a/bundle/run/keys_test.go b/bundle/run/keys_test.go deleted file mode 100644 index 5ab73b13d..000000000 --- a/bundle/run/keys_test.go +++ /dev/null @@ -1,25 +0,0 @@ -package run - -import ( - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/resources" - "github.com/stretchr/testify/assert" -) - -func TestResourceCompletionsUnique(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "foo": {}, - "bar": {}, - }, - }, - }, - } - - assert.ElementsMatch(t, []string{"foo", "bar"}, ResourceCompletions(b)) -} diff --git a/bundle/run/pipeline.go b/bundle/run/pipeline.go index d684f8388..ffe012843 100644 --- a/bundle/run/pipeline.go +++ b/bundle/run/pipeline.go @@ -183,6 +183,18 @@ func (r *pipelineRunner) Cancel(ctx context.Context) error { return err } +func (r *pipelineRunner) Restart(ctx context.Context, opts *Options) (output.RunOutput, error) { + s := cmdio.Spinner(ctx) + s <- "Cancelling the active pipeline update" + err := r.Cancel(ctx) + close(s) + if err != nil { + return nil, err + } + + return r.Run(ctx, opts) +} + func (r *pipelineRunner) ParseArgs(args []string, opts *Options) error { if len(args) == 0 { return nil diff --git a/bundle/run/pipeline_test.go b/bundle/run/pipeline_test.go index 29b57ffdb..e4608061c 100644 --- a/bundle/run/pipeline_test.go +++ b/bundle/run/pipeline_test.go @@ -1,6 +1,7 @@ package run import ( + "bytes" "context" "testing" "time" @@ -8,8 +9,12 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + sdk_config "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/experimental/mocks" "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) @@ -47,3 +52,68 @@ func TestPipelineRunnerCancel(t *testing.T) { err := runner.Cancel(context.Background()) require.NoError(t, err) } + +func TestPipelineRunnerRestart(t *testing.T) { + pipeline := &resources.Pipeline{ + ID: "123", + } + + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "test_pipeline": pipeline, + }, + }, + }, + } + + runner := pipelineRunner{key: "test", bundle: b, pipeline: pipeline} + + m := mocks.NewMockWorkspaceClient(t) + m.WorkspaceClient.Config = &sdk_config.Config{ + Host: "https://test.com", + } + b.SetWorkpaceClient(m.WorkspaceClient) + ctx := context.Background() + ctx = cmdio.InContext(ctx, cmdio.NewIO(flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) + + mockWait := &pipelines.WaitGetPipelineIdle[struct{}]{ + Poll: func(time.Duration, func(*pipelines.GetPipelineResponse)) (*pipelines.GetPipelineResponse, error) { + return nil, nil + }, + } + + pipelineApi := m.GetMockPipelinesAPI() + pipelineApi.EXPECT().Stop(mock.Anything, pipelines.StopRequest{ + PipelineId: "123", + }).Return(mockWait, nil) + + pipelineApi.EXPECT().GetByPipelineId(mock.Anything, "123").Return(&pipelines.GetPipelineResponse{}, nil) + + // Mock runner starting a new update + pipelineApi.EXPECT().StartUpdate(mock.Anything, pipelines.StartUpdate{ + PipelineId: "123", + }).Return(&pipelines.StartUpdateResponse{ + UpdateId: "456", + }, nil) + + // Mock runner polling for events + pipelineApi.EXPECT().ListPipelineEventsAll(mock.Anything, pipelines.ListPipelineEventsRequest{ + Filter: `update_id = '456'`, + MaxResults: 100, + PipelineId: "123", + }).Return([]pipelines.PipelineEvent{}, nil) + + // Mock runner polling for update status + pipelineApi.EXPECT().GetUpdateByPipelineIdAndUpdateId(mock.Anything, "123", "456"). + Return(&pipelines.GetUpdateResponse{ + Update: &pipelines.UpdateInfo{ + State: pipelines.UpdateInfoStateCompleted, + }, + }, nil) + + _, err := runner.Restart(ctx, &Options{}) + require.NoError(t, err) +} diff --git a/bundle/run/runner.go b/bundle/run/runner.go index 0f202ce7d..4c907d068 100644 --- a/bundle/run/runner.go +++ b/bundle/run/runner.go @@ -3,9 +3,10 @@ package run import ( "context" "fmt" - "strings" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + refs "github.com/databricks/cli/bundle/resources" "github.com/databricks/cli/bundle/run/output" ) @@ -27,6 +28,10 @@ type Runner interface { // Run the underlying worklow. Run(ctx context.Context, opts *Options) (output.RunOutput, error) + // Restart the underlying workflow by cancelling any existing runs before + // starting a new one. + Restart(ctx context.Context, opts *Options) (output.RunOutput, error) + // Cancel the underlying workflow. Cancel(ctx context.Context) error @@ -34,34 +39,24 @@ type Runner interface { argsHandler } -// Find locates a runner matching the specified argument. -// -// Its behavior is as follows: -// 1. Try to find a resource with identical to the argument. -// 2. Try to find a resource with . identical to the argument. -// -// If an argument resolves to multiple resources, it returns an error. -func Find(b *bundle.Bundle, arg string) (Runner, error) { - keyOnly, keyWithType := ResourceKeys(b) - if len(keyWithType) == 0 { - return nil, fmt.Errorf("bundle defines no resources") +// IsRunnable returns a filter that only allows runnable resources. +func IsRunnable(ref refs.Reference) bool { + switch ref.Resource.(type) { + case *resources.Job, *resources.Pipeline: + return true + default: + return false + } +} + +// ToRunner converts a resource reference to a runnable resource. +func ToRunner(b *bundle.Bundle, ref refs.Reference) (Runner, error) { + switch resource := ref.Resource.(type) { + case *resources.Job: + return &jobRunner{key: key(ref.KeyWithType), bundle: b, job: resource}, nil + case *resources.Pipeline: + return &pipelineRunner{key: key(ref.KeyWithType), bundle: b, pipeline: resource}, nil + default: + return nil, fmt.Errorf("unsupported resource type: %T", resource) } - - runners, ok := keyOnly[arg] - if !ok { - runners, ok = keyWithType[arg] - if !ok { - return nil, fmt.Errorf("no such resource: %s", arg) - } - } - - if len(runners) != 1 { - var keys []string - for _, runner := range runners { - keys = append(keys, runner.Key()) - } - return nil, fmt.Errorf("ambiguous: %s (can resolve to all of %s)", arg, strings.Join(keys, ", ")) - } - - return runners[0], nil } diff --git a/bundle/run/runner_test.go b/bundle/run/runner_test.go index 85baa192f..2fc5fa6fa 100644 --- a/bundle/run/runner_test.go +++ b/bundle/run/runner_test.go @@ -3,82 +3,14 @@ package run import ( "testing" - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + refs "github.com/databricks/cli/bundle/resources" "github.com/stretchr/testify/assert" ) -func TestFindNoResources(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{}, - }, - } - - _, err := Find(b, "foo") - assert.ErrorContains(t, err, "bundle defines no resources") -} - -func TestFindSingleArg(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "foo": {}, - }, - }, - }, - } - - _, err := Find(b, "foo") - assert.NoError(t, err) -} - -func TestFindSingleArgNotFound(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "foo": {}, - }, - }, - }, - } - - _, err := Find(b, "bar") - assert.ErrorContains(t, err, "no such resource: bar") -} - -func TestFindSingleArgAmbiguous(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "key": {}, - }, - Pipelines: map[string]*resources.Pipeline{ - "key": {}, - }, - }, - }, - } - - _, err := Find(b, "key") - assert.ErrorContains(t, err, "ambiguous: ") -} - -func TestFindSingleArgWithType(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "key": {}, - }, - }, - }, - } - - _, err := Find(b, "jobs.key") - assert.NoError(t, err) +func TestRunner_IsRunnable(t *testing.T) { + assert.True(t, IsRunnable(refs.Reference{Resource: &resources.Job{}})) + assert.True(t, IsRunnable(refs.Reference{Resource: &resources.Pipeline{}})) + assert.False(t, IsRunnable(refs.Reference{Resource: &resources.MlflowModel{}})) + assert.False(t, IsRunnable(refs.Reference{Resource: &resources.MlflowExperiment{}})) } diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 06b9cc15a..178656fe0 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -209,6 +209,10 @@ { "type": "object", "properties": { + "budget_policy_id": { + "description": "The id of the user specified budget policy to use for this job.\nIf not specified, a default budget policy may be applied when creating or modifying the job.\nSee `effective_budget_policy_id` for the budget policy used by this workload.", + "$ref": "#/$defs/string" + }, "continuous": { "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Continuous" @@ -3901,6 +3905,10 @@ { "type": "object", "properties": { + "report": { + "description": "Select tables from a specific source report.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.ReportSpec" + }, "schema": { "description": "Select tables from a specific source schema.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.SchemaSpec" @@ -4233,6 +4241,40 @@ } ] }, + "pipelines.ReportSpec": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination_catalog": { + "description": "Required. Destination catalog to store table.", + "$ref": "#/$defs/string" + }, + "destination_schema": { + "description": "Required. Destination schema to store table.", + "$ref": "#/$defs/string" + }, + "destination_table": { + "description": "Required. Destination table name. The pipeline fails if a table with that name already exists.", + "$ref": "#/$defs/string" + }, + "source_url": { + "description": "Required. Report URL in the source system.", + "$ref": "#/$defs/string" + }, + "table_configuration": { + "description": "Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "pipelines.SchemaSpec": { "anyOf": [ { @@ -4281,7 +4323,7 @@ "$ref": "#/$defs/string" }, "destination_table": { - "description": "Optional. Destination table name. The pipeline fails If a table with that name already exists. If not set, the source table name is used.", + "description": "Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used.", "$ref": "#/$defs/string" }, "source_catalog": { @@ -4329,6 +4371,10 @@ "SCD_TYPE_1", "SCD_TYPE_2" ] + }, + "sequence_by": { + "description": "The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order.", + "$ref": "#/$defs/slice/string" } }, "additionalProperties": false diff --git a/cmd/bundle/bundle.go b/cmd/bundle/bundle.go index 0880c9c44..fb88cd7d0 100644 --- a/cmd/bundle/bundle.go +++ b/cmd/bundle/bundle.go @@ -27,5 +27,6 @@ func New() *cobra.Command { cmd.AddCommand(newGenerateCommand()) cmd.AddCommand(newDebugCommand()) cmd.AddCommand(deployment.NewDeploymentCommand()) + cmd.AddCommand(newOpenCommand()) return cmd } diff --git a/cmd/bundle/open.go b/cmd/bundle/open.go new file mode 100644 index 000000000..a2ad32fd8 --- /dev/null +++ b/cmd/bundle/open.go @@ -0,0 +1,144 @@ +package bundle + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/deploy/terraform" + "github.com/databricks/cli/bundle/phases" + "github.com/databricks/cli/bundle/resources" + "github.com/databricks/cli/cmd/bundle/utils" + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" + "golang.org/x/exp/maps" + + "github.com/pkg/browser" +) + +func promptOpenArgument(ctx context.Context, b *bundle.Bundle) (string, error) { + // Compute map of "Human readable name of resource" -> "resource key". + inv := make(map[string]string) + for k, ref := range resources.Completions(b) { + title := fmt.Sprintf("%s: %s", ref.Description.SingularTitle, ref.Resource.GetName()) + inv[title] = k + } + + key, err := cmdio.Select(ctx, inv, "Resource to open") + if err != nil { + return "", err + } + + return key, nil +} + +func resolveOpenArgument(ctx context.Context, b *bundle.Bundle, args []string) (string, error) { + // If no arguments are specified, prompt the user to select the resource to open. + if len(args) == 0 && cmdio.IsPromptSupported(ctx) { + return promptOpenArgument(ctx, b) + } + + if len(args) < 1 { + return "", fmt.Errorf("expected a KEY of the resource to open") + } + + return args[0], nil +} + +func newOpenCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "open", + Short: "Open a resource in the browser", + Args: root.MaximumNArgs(1), + } + + var forcePull bool + cmd.Flags().BoolVar(&forcePull, "force-pull", false, "Skip local cache and load the state from the remote workspace") + + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + b, diags := utils.ConfigureBundleWithVariables(cmd) + if err := diags.Error(); err != nil { + return diags.Error() + } + + diags = bundle.Apply(ctx, b, phases.Initialize()) + if err := diags.Error(); err != nil { + return err + } + + arg, err := resolveOpenArgument(ctx, b, args) + if err != nil { + return err + } + + cacheDir, err := terraform.Dir(ctx, b) + if err != nil { + return err + } + _, stateFileErr := os.Stat(filepath.Join(cacheDir, terraform.TerraformStateFileName)) + _, configFileErr := os.Stat(filepath.Join(cacheDir, terraform.TerraformConfigFileName)) + noCache := errors.Is(stateFileErr, os.ErrNotExist) || errors.Is(configFileErr, os.ErrNotExist) + + if forcePull || noCache { + diags = bundle.Apply(ctx, b, bundle.Seq( + terraform.StatePull(), + terraform.Interpolate(), + terraform.Write(), + )) + if err := diags.Error(); err != nil { + return err + } + } + + diags = bundle.Apply(ctx, b, bundle.Seq( + terraform.Load(), + mutator.InitializeURLs(), + )) + if err := diags.Error(); err != nil { + return err + } + + // Locate resource to open. + ref, err := resources.Lookup(b, arg) + if err != nil { + return err + } + + // Confirm that the resource has a URL. + url := ref.Resource.GetURL() + if url == "" { + return fmt.Errorf("resource does not have a URL associated with it (has it been deployed?)") + } + + return browser.OpenURL(url) + } + + cmd.ValidArgsFunction = func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + b, diags := root.MustConfigureBundle(cmd) + if err := diags.Error(); err != nil { + cobra.CompErrorln(err.Error()) + return nil, cobra.ShellCompDirectiveError + } + + // No completion in the context of a bundle. + // Source and destination paths are taken from bundle configuration. + if b == nil { + return nil, cobra.ShellCompDirectiveNoFileComp + } + + if len(args) == 0 { + completions := resources.Completions(b) + return maps.Keys(completions), cobra.ShellCompDirectiveNoFileComp + } else { + return nil, cobra.ShellCompDirectiveNoFileComp + } + } + + return cmd +} diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index 9ef5eb8ff..96851d0c0 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -1,20 +1,69 @@ package bundle import ( + "context" "encoding/json" "fmt" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/phases" + "github.com/databricks/cli/bundle/resources" "github.com/databricks/cli/bundle/run" + "github.com/databricks/cli/bundle/run/output" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/flags" "github.com/spf13/cobra" + "golang.org/x/exp/maps" ) +func promptRunArgument(ctx context.Context, b *bundle.Bundle) (string, error) { + // Compute map of "Human readable name of resource" -> "resource key". + inv := make(map[string]string) + for k, ref := range resources.Completions(b, run.IsRunnable) { + title := fmt.Sprintf("%s: %s", ref.Description.SingularTitle, ref.Resource.GetName()) + inv[title] = k + } + + key, err := cmdio.Select(ctx, inv, "Resource to run") + if err != nil { + return "", err + } + + return key, nil +} + +func resolveRunArgument(ctx context.Context, b *bundle.Bundle, args []string) (string, error) { + // If no arguments are specified, prompt the user to select something to run. + if len(args) == 0 && cmdio.IsPromptSupported(ctx) { + return promptRunArgument(ctx, b) + } + + if len(args) < 1 { + return "", fmt.Errorf("expected a KEY of the resource to run") + } + + return args[0], nil +} + +func keyToRunner(b *bundle.Bundle, arg string) (run.Runner, error) { + // Locate the resource to run. + ref, err := resources.Lookup(b, arg, run.IsRunnable) + if err != nil { + return nil, err + } + + // Convert the resource to a runnable resource. + runner, err := run.ToRunner(b, ref) + if err != nil { + return nil, err + } + + return runner, nil +} + func newRunCommand() *cobra.Command { cmd := &cobra.Command{ Use: "run [flags] KEY", @@ -60,22 +109,9 @@ task or a Python wheel task, the second example applies. return err } - // If no arguments are specified, prompt the user to select something to run. - if len(args) == 0 && cmdio.IsPromptSupported(ctx) { - // Invert completions from KEY -> NAME, to NAME -> KEY. - inv := make(map[string]string) - for k, v := range run.ResourceCompletionMap(b) { - inv[v] = k - } - id, err := cmdio.Select(ctx, inv, "Resource to run") - if err != nil { - return err - } - args = append(args, id) - } - - if len(args) < 1 { - return fmt.Errorf("expected a KEY of the resource to run") + arg, err := resolveRunArgument(ctx, b, args) + if err != nil { + return err } diags = bundle.Apply(ctx, b, bundle.Seq( @@ -88,7 +124,7 @@ task or a Python wheel task, the second example applies. return err } - runner, err := run.Find(b, args[0]) + runner, err := keyToRunner(b, arg) if err != nil { return err } @@ -100,19 +136,16 @@ task or a Python wheel task, the second example applies. } runOptions.NoWait = noWait + var output output.RunOutput if restart { - s := cmdio.Spinner(ctx) - s <- "Cancelling all runs" - err := runner.Cancel(ctx) - close(s) - if err != nil { - return err - } + output, err = runner.Restart(ctx, &runOptions) + } else { + output, err = runner.Run(ctx, &runOptions) } - output, err := runner.Run(ctx, &runOptions) if err != nil { return err } + if output != nil { switch root.OutputType(cmd) { case flags.OutputText: @@ -148,10 +181,11 @@ task or a Python wheel task, the second example applies. } if len(args) == 0 { - return run.ResourceCompletions(b), cobra.ShellCompDirectiveNoFileComp + completions := resources.Completions(b, run.IsRunnable) + return maps.Keys(completions), cobra.ShellCompDirectiveNoFileComp } else { // If we know the resource to run, we can complete additional positional arguments. - runner, err := run.Find(b, args[0]) + runner, err := keyToRunner(b, args[0]) if err != nil { return nil, cobra.ShellCompDirectiveError } diff --git a/cmd/bundle/sync.go b/cmd/bundle/sync.go index df3e087c2..274bba0e0 100644 --- a/cmd/bundle/sync.go +++ b/cmd/bundle/sync.go @@ -1,7 +1,9 @@ package bundle import ( + "context" "fmt" + "io" "time" "github.com/databricks/cli/bundle" @@ -9,6 +11,7 @@ import ( "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/sync" "github.com/spf13/cobra" @@ -18,6 +21,7 @@ type syncFlags struct { interval time.Duration full bool watch bool + output flags.Output } func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, b *bundle.Bundle) (*sync.SyncOptions, error) { @@ -26,6 +30,21 @@ func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, b *bundle.Bundle) return nil, fmt.Errorf("cannot get sync options: %w", err) } + if f.output != "" { + var outputFunc func(context.Context, <-chan sync.Event, io.Writer) + switch f.output { + case flags.OutputText: + outputFunc = sync.TextOutput + case flags.OutputJSON: + outputFunc = sync.JsonOutput + } + if outputFunc != nil { + opts.OutputHandler = func(ctx context.Context, c <-chan sync.Event) { + outputFunc(ctx, c, cmd.OutOrStdout()) + } + } + } + opts.Full = f.full opts.PollInterval = f.interval return opts, nil @@ -42,6 +61,7 @@ func newSyncCommand() *cobra.Command { cmd.Flags().DurationVar(&f.interval, "interval", 1*time.Second, "file system polling interval (for --watch)") cmd.Flags().BoolVar(&f.full, "full", false, "perform full synchronization (default is incremental)") cmd.Flags().BoolVar(&f.watch, "watch", false, "watch local file system for changes") + cmd.Flags().Var(&f.output, "output", "type of the output format") cmd.RunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() @@ -65,6 +85,7 @@ func newSyncCommand() *cobra.Command { if err != nil { return err } + defer s.Close() log.Infof(ctx, "Remote file sync location: %v", opts.RemotePath) diff --git a/cmd/workspace/apps/apps.go b/cmd/workspace/apps/apps.go index 4cee2f82a..9331ddc2e 100755 --- a/cmd/workspace/apps/apps.go +++ b/cmd/workspace/apps/apps.go @@ -28,9 +28,6 @@ func New() *cobra.Command { Annotations: map[string]string{ "package": "apps", }, - - // This service is being previewed; hide from help output. - Hidden: true, } // Add methods diff --git a/cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go b/cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go new file mode 100755 index 000000000..d09755370 --- /dev/null +++ b/cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go @@ -0,0 +1,220 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package disable_legacy_dbfs + +import ( + "fmt" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + "github.com/databricks/databricks-sdk-go/service/settings" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "disable-legacy-dbfs", + Short: `When this setting is on, access to DBFS root and DBFS mounts is disallowed (as well as creation of new mounts).`, + Long: `When this setting is on, access to DBFS root and DBFS mounts is disallowed (as + well as creation of new mounts). When the setting is off, all DBFS + functionality is enabled`, + + // This service is being previewed; hide from help output. + Hidden: true, + } + + // Add methods + cmd.AddCommand(newDelete()) + cmd.AddCommand(newGet()) + cmd.AddCommand(newUpdate()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start delete command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var deleteOverrides []func( + *cobra.Command, + *settings.DeleteDisableLegacyDbfsRequest, +) + +func newDelete() *cobra.Command { + cmd := &cobra.Command{} + + var deleteReq settings.DeleteDisableLegacyDbfsRequest + + // TODO: short flags + + cmd.Flags().StringVar(&deleteReq.Etag, "etag", deleteReq.Etag, `etag used for versioning.`) + + cmd.Use = "delete" + cmd.Short = `Delete the disable legacy DBFS setting.` + cmd.Long = `Delete the disable legacy DBFS setting. + + Deletes the disable legacy DBFS setting for a workspace, reverting back to the + default.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(0) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + response, err := w.Settings.DisableLegacyDbfs().Delete(ctx, deleteReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range deleteOverrides { + fn(cmd, &deleteReq) + } + + return cmd +} + +// start get command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var getOverrides []func( + *cobra.Command, + *settings.GetDisableLegacyDbfsRequest, +) + +func newGet() *cobra.Command { + cmd := &cobra.Command{} + + var getReq settings.GetDisableLegacyDbfsRequest + + // TODO: short flags + + cmd.Flags().StringVar(&getReq.Etag, "etag", getReq.Etag, `etag used for versioning.`) + + cmd.Use = "get" + cmd.Short = `Get the disable legacy DBFS setting.` + cmd.Long = `Get the disable legacy DBFS setting. + + Gets the disable legacy DBFS setting.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(0) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + response, err := w.Settings.DisableLegacyDbfs().Get(ctx, getReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range getOverrides { + fn(cmd, &getReq) + } + + return cmd +} + +// start update command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var updateOverrides []func( + *cobra.Command, + *settings.UpdateDisableLegacyDbfsRequest, +) + +func newUpdate() *cobra.Command { + cmd := &cobra.Command{} + + var updateReq settings.UpdateDisableLegacyDbfsRequest + var updateJson flags.JsonFlag + + // TODO: short flags + cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + + cmd.Use = "update" + cmd.Short = `Update the disable legacy DBFS setting.` + cmd.Long = `Update the disable legacy DBFS setting. + + Updates the disable legacy DBFS setting for the workspace.` + + cmd.Annotations = make(map[string]string) + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + if cmd.Flags().Changed("json") { + diags := updateJson.Unmarshal(&updateReq) + if diags.HasError() { + return diags.Error() + } + if len(diags) > 0 { + err := cmdio.RenderDiagnosticsToErrorOut(ctx, diags) + if err != nil { + return err + } + } + } else { + return fmt.Errorf("please provide command input in JSON format by specifying the --json flag") + } + + response, err := w.Settings.DisableLegacyDbfs().Update(ctx, updateReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range updateOverrides { + fn(cmd, &updateReq) + } + + return cmd +} + +// end service DisableLegacyDbfs diff --git a/cmd/workspace/jobs/jobs.go b/cmd/workspace/jobs/jobs.go index d4ceb0c28..9e8db43d0 100755 --- a/cmd/workspace/jobs/jobs.go +++ b/cmd/workspace/jobs/jobs.go @@ -1557,6 +1557,7 @@ func newSubmit() *cobra.Command { cmd.Flags().Var(&submitJson, "json", `either inline JSON string or @path/to/file.json with request body`) // TODO: array: access_control_list + cmd.Flags().StringVar(&submitReq.BudgetPolicyId, "budget-policy-id", submitReq.BudgetPolicyId, `The user specified id of the budget policy to use for this one-time run.`) // TODO: complex arg: email_notifications // TODO: array: environments // TODO: complex arg: git_source diff --git a/cmd/workspace/settings/settings.go b/cmd/workspace/settings/settings.go index aaeecf41b..31e6ceee4 100755 --- a/cmd/workspace/settings/settings.go +++ b/cmd/workspace/settings/settings.go @@ -9,6 +9,7 @@ import ( compliance_security_profile "github.com/databricks/cli/cmd/workspace/compliance-security-profile" default_namespace "github.com/databricks/cli/cmd/workspace/default-namespace" disable_legacy_access "github.com/databricks/cli/cmd/workspace/disable-legacy-access" + disable_legacy_dbfs "github.com/databricks/cli/cmd/workspace/disable-legacy-dbfs" enhanced_security_monitoring "github.com/databricks/cli/cmd/workspace/enhanced-security-monitoring" restrict_workspace_admins "github.com/databricks/cli/cmd/workspace/restrict-workspace-admins" ) @@ -33,6 +34,7 @@ func New() *cobra.Command { cmd.AddCommand(compliance_security_profile.New()) cmd.AddCommand(default_namespace.New()) cmd.AddCommand(disable_legacy_access.New()) + cmd.AddCommand(disable_legacy_dbfs.New()) cmd.AddCommand(enhanced_security_monitoring.New()) cmd.AddCommand(restrict_workspace_admins.New()) diff --git a/go.mod b/go.mod index 697205f33..9059b9637 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ toolchain go1.22.7 require ( github.com/Masterminds/semver/v3 v3.3.0 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 - github.com/databricks/databricks-sdk-go v0.48.0 // Apache 2.0 + github.com/databricks/databricks-sdk-go v0.49.0 // Apache 2.0 github.com/fatih/color v1.17.0 // MIT github.com/ghodss/yaml v1.0.0 // MIT + NOTICE github.com/google/uuid v1.6.0 // BSD-3-Clause diff --git a/go.sum b/go.sum index 03698b20a..f365fcbf6 100644 --- a/go.sum +++ b/go.sum @@ -32,8 +32,8 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg= github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/databricks/databricks-sdk-go v0.48.0 h1:46KtsnRo+FGhC3izUXbpL0PXBNomvsdignYDhJZlm9s= -github.com/databricks/databricks-sdk-go v0.48.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= +github.com/databricks/databricks-sdk-go v0.49.0 h1:VBTeZZMLIuBSM4kxOCfUcW9z4FUQZY2QeNRD5qm9FUQ= +github.com/databricks/databricks-sdk-go v0.49.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/internal/bugbash/README.md b/internal/bugbash/README.md new file mode 100644 index 000000000..941ab6227 --- /dev/null +++ b/internal/bugbash/README.md @@ -0,0 +1,13 @@ +# Bugbash + +The script in this directory can be used to conveniently exec into a shell +where a CLI build for a specific branch is made available. + +## Usage + +This script prompts if you do NOT have at least Bash 5 installed, +but works without command completion with earlier versions. + +```shell +bash <(curl -fsSL https://raw.githubusercontent.com/databricks/cli/main/internal/bugbash/exec.sh) my-branch +``` diff --git a/internal/bugbash/exec.sh b/internal/bugbash/exec.sh new file mode 100755 index 000000000..ac25b16ed --- /dev/null +++ b/internal/bugbash/exec.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Set the GitHub repository for the Databricks CLI. +export GH_REPO="databricks/cli" + +# Synthesize the directory name for the snapshot build. +function cli_snapshot_directory() { + dir="cli" + + # Append OS + case "$(uname -s)" in + Linux) + dir="${dir}_linux" + ;; + Darwin) + dir="${dir}_darwin" + ;; + *) + echo "Unknown operating system: $os" + ;; + esac + + # Append architecture + case "$(uname -m)" in + x86_64) + dir="${dir}_amd64_v1" + ;; + i386|i686) + dir="${dir}_386" + ;; + arm64|aarch64) + dir="${dir}_arm64" + ;; + armv7l|armv8l) + dir="${dir}_arm_6" + ;; + *) + echo "Unknown architecture: $arch" + ;; + esac + + echo $dir +} + +BRANCH=$1 +shift + +# Default to main branch if branch is not specified. +if [ -z "$BRANCH" ]; then + BRANCH=main +fi + +if [ -z "$BRANCH" ]; then + echo "Please specify which branch to bugbash..." + exit 1 +fi + +# Check if the "gh" command is available. +if ! command -v gh &> /dev/null; then + echo "The GitHub CLI (gh) is required to download the snapshot build." + echo "Install and configure it with:" + echo "" + echo " brew install gh" + echo " gh auth login" + echo "" + exit 1 +fi + +echo "Looking for a snapshot build of the Databricks CLI on branch $BRANCH..." + +# Find last successful build on $BRANCH. +last_successful_run_id=$( + gh run list -b "$BRANCH" -w release-snapshot --json 'databaseId,conclusion' | + jq 'limit(1; .[] | select(.conclusion == "success")) | .databaseId' +) +if [ -z "$last_successful_run_id" ]; then + echo "Unable to find last successful build of the release-snapshot workflow for branch $BRANCH." + exit 1 +fi + +# Determine artifact name with the right binaries for this runner. +case "$(uname -s)" in +Linux) + artifact="cli_linux_snapshot" + ;; +Darwin) + artifact="cli_darwin_snapshot" + ;; +esac + +# Create a temporary directory to download the artifact. +dir=$(mktemp -d) + +# Download the artifact. +echo "Downloading the snapshot build..." +gh run download "$last_successful_run_id" -n "$artifact" -D "$dir/.bin" +dir="$dir/.bin/$(cli_snapshot_directory)" +if [ ! -d "$dir" ]; then + echo "Directory does not exist: $dir" + exit 1 +fi + +# Make CLI available on $PATH. +chmod +x "$dir/databricks" +export PATH="$dir:$PATH" + +# Set the prompt to indicate the bugbash environment and exec. +export PS1="(bugbash $BRANCH) \[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ " + +# Display completion instructions. +echo "" +echo "==================================================================" + +if [[ ${BASH_VERSINFO[0]} -lt 5 ]]; then + echo -en "\033[31m" + echo "You have Bash version < 5 installed... completion won't work." + echo -en "\033[0m" + echo "" + echo "Install it with:" + echo "" + echo " brew install bash bash-completion" + echo "" + echo "==================================================================" +fi + +echo "" +echo "To load completions in your current shell session:" +echo "" +echo " source /opt/homebrew/etc/profile.d/bash_completion.sh" +echo " source <(databricks completion bash)" +echo "" +echo "==================================================================" +echo "" + +# Exec into a new shell. +# Note: don't use zsh because on macOS it _always_ overwrites PS1. +exec /usr/bin/env bash diff --git a/libs/git/repository.go b/libs/git/repository.go index 82ee7987b..0bbd57865 100644 --- a/libs/git/repository.go +++ b/libs/git/repository.go @@ -23,8 +23,21 @@ type Repository struct { // directory where we process .gitignore files. real bool - // root is the absolute path to the repository root. - root vfs.Path + // rootDir is the path to the root of the repository checkout. + // This can be either the main repository checkout or a worktree checkout. + // For more information about worktrees, see: https://git-scm.com/docs/git-worktree#_description. + rootDir vfs.Path + + // gitDir is the equivalent of $GIT_DIR and points to the + // `.git` directory of a repository or a worktree directory. + // See https://git-scm.com/docs/git-worktree#_details for more information. + gitDir vfs.Path + + // gitCommonDir is the equivalent of $GIT_COMMON_DIR and points to the + // `.git` directory of the main working tree (common between worktrees). + // This is equivalent to [gitDir] if this is the main working tree. + // See https://git-scm.com/docs/git-worktree#_details for more information. + gitCommonDir vfs.Path // ignore contains a list of ignore patterns indexed by the // path prefix relative to the repository root. @@ -44,12 +57,11 @@ type Repository struct { // Root returns the absolute path to the repository root. func (r *Repository) Root() string { - return r.root.Native() + return r.rootDir.Native() } func (r *Repository) CurrentBranch() (string, error) { - // load .git/HEAD - ref, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, "HEAD")) + ref, err := LoadReferenceFile(r.gitDir, "HEAD") if err != nil { return "", err } @@ -65,8 +77,7 @@ func (r *Repository) CurrentBranch() (string, error) { } func (r *Repository) LatestCommit() (string, error) { - // load .git/HEAD - ref, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, "HEAD")) + ref, err := LoadReferenceFile(r.gitDir, "HEAD") if err != nil { return "", err } @@ -80,12 +91,12 @@ func (r *Repository) LatestCommit() (string, error) { return ref.Content, nil } - // read reference from .git/HEAD + // Read reference from $GIT_DIR/HEAD branchHeadPath, err := ref.ResolvePath() if err != nil { return "", err } - branchHeadRef, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, branchHeadPath)) + branchHeadRef, err := LoadReferenceFile(r.gitCommonDir, branchHeadPath) if err != nil { return "", err } @@ -125,7 +136,7 @@ func (r *Repository) loadConfig() error { if err != nil { return fmt.Errorf("unable to load user specific gitconfig: %w", err) } - err = config.loadFile(r.root, ".git/config") + err = config.loadFile(r.gitCommonDir, "config") if err != nil { return fmt.Errorf("unable to load repository specific gitconfig: %w", err) } @@ -133,12 +144,6 @@ func (r *Repository) loadConfig() error { return nil } -// newIgnoreFile constructs a new [ignoreRules] implementation backed by -// a file using the specified path relative to the repository root. -func (r *Repository) newIgnoreFile(relativeIgnoreFilePath string) ignoreRules { - return newIgnoreFile(r.root, relativeIgnoreFilePath) -} - // getIgnoreRules returns a slice of [ignoreRules] that apply // for the specified prefix. The prefix must be cleaned by the caller. // It lazily initializes an entry for the specified prefix if it @@ -149,7 +154,7 @@ func (r *Repository) getIgnoreRules(prefix string) []ignoreRules { return fs } - r.ignore[prefix] = append(r.ignore[prefix], r.newIgnoreFile(path.Join(prefix, gitIgnoreFileName))) + r.ignore[prefix] = append(r.ignore[prefix], newIgnoreFile(r.rootDir, path.Join(prefix, gitIgnoreFileName))) return r.ignore[prefix] } @@ -205,21 +210,30 @@ func (r *Repository) Ignore(relPath string) (bool, error) { func NewRepository(path vfs.Path) (*Repository, error) { real := true - rootPath, err := vfs.FindLeafInTree(path, GitDirectoryName) + rootDir, err := vfs.FindLeafInTree(path, GitDirectoryName) if err != nil { if !errors.Is(err, fs.ErrNotExist) { return nil, err } // Cannot find `.git` directory. - // Treat the specified path as a potential repository root. + // Treat the specified path as a potential repository root checkout. real = false - rootPath = path + rootDir = path + } + + // Derive $GIT_DIR and $GIT_COMMON_DIR paths if this is a real repository. + // If it isn't a real repository, they'll point to the (non-existent) `.git` directory. + gitDir, gitCommonDir, err := resolveGitDirs(rootDir) + if err != nil { + return nil, err } repo := &Repository{ - real: real, - root: rootPath, - ignore: make(map[string][]ignoreRules), + real: real, + rootDir: rootDir, + gitDir: gitDir, + gitCommonDir: gitCommonDir, + ignore: make(map[string][]ignoreRules), } err = repo.loadConfig() @@ -253,9 +267,9 @@ func NewRepository(path vfs.Path) (*Repository, error) { ".git", }), // Load repository-wide exclude file. - repo.newIgnoreFile(".git/info/exclude"), + newIgnoreFile(repo.gitCommonDir, "info/exclude"), // Load root gitignore file. - repo.newIgnoreFile(".gitignore"), + newIgnoreFile(repo.rootDir, ".gitignore"), } return repo, nil diff --git a/libs/git/view.go b/libs/git/view.go index 90eed0bb8..2d2e39a60 100644 --- a/libs/git/view.go +++ b/libs/git/view.go @@ -80,7 +80,7 @@ func NewView(root vfs.Path) (*View, error) { // Target path must be relative to the repository root path. target := root.Native() - prefix := repo.root.Native() + prefix := repo.rootDir.Native() if !strings.HasPrefix(target, prefix) { return nil, fmt.Errorf("path %q is not within repository root %q", root.Native(), prefix) } diff --git a/libs/git/worktree.go b/libs/git/worktree.go new file mode 100644 index 000000000..964c1c95b --- /dev/null +++ b/libs/git/worktree.go @@ -0,0 +1,123 @@ +package git + +import ( + "bufio" + "errors" + "fmt" + "io/fs" + "path/filepath" + "strings" + + "github.com/databricks/cli/libs/vfs" +) + +func readLines(root vfs.Path, name string) ([]string, error) { + file, err := root.Open(name) + if err != nil { + return nil, err + } + + defer file.Close() + + var lines []string + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + + return lines, scanner.Err() +} + +// readGitDir reads the value of the `.git` file in a worktree. +func readGitDir(root vfs.Path) (string, error) { + lines, err := readLines(root, GitDirectoryName) + if err != nil { + return "", err + } + + var gitDir string + for _, line := range lines { + parts := strings.SplitN(line, ": ", 2) + if len(parts) != 2 { + continue + } + + if parts[0] == "gitdir" { + gitDir = strings.TrimSpace(parts[1]) + } + } + + if gitDir == "" { + return "", fmt.Errorf(`expected %q to contain a line with "gitdir: [...]"`, filepath.Join(root.Native(), GitDirectoryName)) + } + + return gitDir, nil +} + +// readGitCommonDir reads the value of the `commondir` file in the `.git` directory of a worktree. +// This file typically contains "../.." to point to $GIT_COMMON_DIR. +func readGitCommonDir(gitDir vfs.Path) (string, error) { + lines, err := readLines(gitDir, "commondir") + if err != nil { + return "", err + } + + if len(lines) == 0 { + return "", errors.New("file is empty") + } + + return strings.TrimSpace(lines[0]), nil +} + +// resolveGitDirs resolves the paths for $GIT_DIR and $GIT_COMMON_DIR. +// The path argument is the root of the checkout where (supposedly) a `.git` file or directory exists. +func resolveGitDirs(root vfs.Path) (vfs.Path, vfs.Path, error) { + fileInfo, err := root.Stat(GitDirectoryName) + if err != nil { + // If the `.git` file or directory does not exist, then this is not a git repository. + // Return paths that we know don't exist, so we do not need to perform nil checks in the caller. + if errors.Is(err, fs.ErrNotExist) { + gitDir := vfs.MustNew(filepath.Join(root.Native(), GitDirectoryName)) + return gitDir, gitDir, nil + } + return nil, nil, err + } + + // If the path is a directory, then it is the main working tree. + // Both $GIT_DIR and $GIT_COMMON_DIR point to the same directory. + if fileInfo.IsDir() { + gitDir := vfs.MustNew(filepath.Join(root.Native(), GitDirectoryName)) + return gitDir, gitDir, nil + } + + // If the path is not a directory, then it is a worktree. + // Read value for $GIT_DIR. + gitDirValue, err := readGitDir(root) + if err != nil { + return nil, nil, err + } + + // Resolve $GIT_DIR. + var gitDir vfs.Path + if filepath.IsAbs(gitDirValue) { + gitDir = vfs.MustNew(gitDirValue) + } else { + gitDir = vfs.MustNew(filepath.Join(root.Native(), gitDirValue)) + } + + // Read value for $GIT_COMMON_DIR. + gitCommonDirValue, err := readGitCommonDir(gitDir) + if err != nil { + return nil, nil, fmt.Errorf(`expected "commondir" file in worktree git folder at %q: %w`, gitDir.Native(), err) + } + + // Resolve $GIT_COMMON_DIR. + var gitCommonDir vfs.Path + if filepath.IsAbs(gitCommonDirValue) { + gitCommonDir = vfs.MustNew(gitCommonDirValue) + } else { + gitCommonDir = vfs.MustNew(filepath.Join(gitDir.Native(), gitCommonDirValue)) + } + + return gitDir, gitCommonDir, nil +} diff --git a/libs/git/worktree_test.go b/libs/git/worktree_test.go new file mode 100644 index 000000000..3d620c483 --- /dev/null +++ b/libs/git/worktree_test.go @@ -0,0 +1,108 @@ +package git + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func setupWorktree(t *testing.T) string { + var err error + + tmpDir := t.TempDir() + + // Checkout path + err = os.MkdirAll(filepath.Join(tmpDir, "my_worktree"), os.ModePerm) + require.NoError(t, err) + + // Main $GIT_COMMON_DIR + err = os.MkdirAll(filepath.Join(tmpDir, ".git"), os.ModePerm) + require.NoError(t, err) + + // Worktree $GIT_DIR + err = os.MkdirAll(filepath.Join(tmpDir, ".git/worktrees/my_worktree"), os.ModePerm) + require.NoError(t, err) + + return tmpDir +} + +func writeGitDir(t *testing.T, dir, content string) { + err := os.WriteFile(filepath.Join(dir, "my_worktree/.git"), []byte(content), os.ModePerm) + require.NoError(t, err) +} + +func writeGitCommonDir(t *testing.T, dir, content string) { + err := os.WriteFile(filepath.Join(dir, ".git/worktrees/my_worktree/commondir"), []byte(content), os.ModePerm) + require.NoError(t, err) +} + +func verifyCorrectDirs(t *testing.T, dir string) { + gitDir, gitCommonDir, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + require.NoError(t, err) + assert.Equal(t, filepath.Join(dir, ".git/worktrees/my_worktree"), gitDir.Native()) + assert.Equal(t, filepath.Join(dir, ".git"), gitCommonDir.Native()) +} + +func TestWorktreeResolveGitDir(t *testing.T) { + dir := setupWorktree(t) + writeGitCommonDir(t, dir, "../..") + + t.Run("relative", func(t *testing.T) { + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", "../.git/worktrees/my_worktree")) + verifyCorrectDirs(t, dir) + }) + + t.Run("absolute", func(t *testing.T) { + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", filepath.Join(dir, ".git/worktrees/my_worktree"))) + verifyCorrectDirs(t, dir) + }) + + t.Run("additional spaces", func(t *testing.T) { + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s \n\n\n", "../.git/worktrees/my_worktree")) + verifyCorrectDirs(t, dir) + }) + + t.Run("empty", func(t *testing.T) { + writeGitDir(t, dir, "") + + _, _, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + assert.ErrorContains(t, err, ` to contain a line with "gitdir: [...]"`) + }) +} + +func TestWorktreeResolveCommonDir(t *testing.T) { + dir := setupWorktree(t) + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", "../.git/worktrees/my_worktree")) + + t.Run("relative", func(t *testing.T) { + writeGitCommonDir(t, dir, "../..") + verifyCorrectDirs(t, dir) + }) + + t.Run("absolute", func(t *testing.T) { + writeGitCommonDir(t, dir, filepath.Join(dir, ".git")) + verifyCorrectDirs(t, dir) + }) + + t.Run("additional spaces", func(t *testing.T) { + writeGitCommonDir(t, dir, " ../.. \n\n\n") + verifyCorrectDirs(t, dir) + }) + + t.Run("empty", func(t *testing.T) { + writeGitCommonDir(t, dir, "") + + _, _, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + assert.ErrorContains(t, err, `expected "commondir" file in worktree git folder at `) + }) + + t.Run("missing", func(t *testing.T) { + _, _, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + assert.ErrorContains(t, err, `expected "commondir" file in worktree git folder at `) + }) +} diff --git a/libs/process/opts.go b/libs/process/opts.go index 9516e49ba..dd0667516 100644 --- a/libs/process/opts.go +++ b/libs/process/opts.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "os/exec" + "sync" ) type execOption func(context.Context, *exec.Cmd) error @@ -69,10 +70,24 @@ func WithStdoutWriter(dst io.Writer) execOption { } } +// safeWriter is a writer that is safe to use concurrently. +// It serializes writes to the underlying writer. +type safeWriter struct { + w io.Writer + m sync.Mutex +} + +func (s *safeWriter) Write(p []byte) (n int, err error) { + s.m.Lock() + defer s.m.Unlock() + return s.w.Write(p) +} + func WithCombinedOutput(buf *bytes.Buffer) execOption { + sw := &safeWriter{w: buf} return func(_ context.Context, c *exec.Cmd) error { - c.Stdout = io.MultiWriter(buf, c.Stdout) - c.Stderr = io.MultiWriter(buf, c.Stderr) + c.Stdout = io.MultiWriter(sw, c.Stdout) + c.Stderr = io.MultiWriter(sw, c.Stderr) return nil } } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index 50e5ad97c..1c6b8607e 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -3,7 +3,7 @@ resources: pipelines: {{.project_name}}_pipeline: name: {{.project_name}}_pipeline - {{- if eq default_catalog ""}} + {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: # catalog: catalog_name {{- else}}