From 89c0af5bdcee82bc01a27930c86d550402eedc96 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 31 Jul 2024 17:46:28 +0530 Subject: [PATCH] Add resource for UC schemas to DABs (#1413) ## Changes This PR adds support for UC Schemas to DABs. This allows users to define schemas for tables and other assets their pipelines/workflows create as part of the DAB, thus managing the life-cycle in the DAB. The first version has a couple of intentional limitations: 1. The owner of the schema will be the deployment user. Changing the owner of the schema is not allowed (yet). `run_as` will not be restricted for DABs containing UC schemas. Let's limit the scope of run_as to the compute identity used instead of ownership of data assets like UC schemas. 2. API fields that are present in the update API but not the create API. For example: enabling predictive optimization is not supported in the create schema API and thus is not available in DABs at the moment. ## Tests Manually and integration test. Manually verified the following work: 1. Development mode adds a "dev_" prefix. 2. Modified status is correctly computed in the `bundle summary` command. 3. Grants work as expected, for assigning privileges. 4. Variable interpolation works for the schema ID. --- bundle/config/mutator/process_target_mode.go | 7 + .../mutator/process_target_mode_test.go | 6 + bundle/config/mutator/run_as_test.go | 2 + bundle/config/resources.go | 1 + bundle/config/resources/schema.go | 27 ++++ bundle/deploy/terraform/convert.go | 21 ++- bundle/deploy/terraform/convert_test.go | 57 ++++++++ bundle/deploy/terraform/interpolate.go | 2 + bundle/deploy/terraform/interpolate_test.go | 2 + .../deploy/terraform/tfdyn/convert_schema.go | 53 ++++++++ .../terraform/tfdyn/convert_schema_test.go | 75 +++++++++++ bundle/phases/deploy.go | 98 ++++++++++++-- cmd/bundle/deploy.go | 5 +- internal/acc/workspace.go | 28 ++++ .../uc_schema/databricks_template_schema.json | 8 ++ .../uc_schema/template/databricks.yml.tmpl | 19 +++ .../bundle/bundles/uc_schema/template/nb.sql | 2 + .../uc_schema/template/schema.yml.tmpl | 13 ++ internal/bundle/deploy_test.go | 125 ++++++++++++++++++ internal/bundle/helpers.go | 2 +- 20 files changed, 540 insertions(+), 13 deletions(-) create mode 100644 bundle/config/resources/schema.go create mode 100644 bundle/deploy/terraform/tfdyn/convert_schema.go create mode 100644 bundle/deploy/terraform/tfdyn/convert_schema_test.go create mode 100644 internal/bundle/bundles/uc_schema/databricks_template_schema.json create mode 100644 internal/bundle/bundles/uc_schema/template/databricks.yml.tmpl create mode 100644 internal/bundle/bundles/uc_schema/template/nb.sql create mode 100644 internal/bundle/bundles/uc_schema/template/schema.yml.tmpl create mode 100644 internal/bundle/deploy_test.go diff --git a/bundle/config/mutator/process_target_mode.go b/bundle/config/mutator/process_target_mode.go index b50716fd..9db97907 100644 --- a/bundle/config/mutator/process_target_mode.go +++ b/bundle/config/mutator/process_target_mode.go @@ -112,6 +112,13 @@ func transformDevelopmentMode(ctx context.Context, b *bundle.Bundle) diag.Diagno } } + for i := range r.Schemas { + prefix = "dev_" + b.Config.Workspace.CurrentUser.ShortName + "_" + r.Schemas[i].Name = prefix + r.Schemas[i].Name + // HTTP API for schemas doesn't yet support tags. It's only supported in + // the Databricks UI and via the SQL API. + } + return nil } diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index 03da64e7..f0c8aee9 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -114,6 +114,9 @@ func mockBundle(mode config.Mode) *bundle.Bundle { }, }, }, + Schemas: map[string]*resources.Schema{ + "schema1": {CreateSchema: &catalog.CreateSchema{Name: "schema1"}}, + }, }, }, // Use AWS implementation for testing. @@ -167,6 +170,9 @@ func TestProcessTargetModeDevelopment(t *testing.T) { assert.Equal(t, "qualityMonitor1", b.Config.Resources.QualityMonitors["qualityMonitor1"].TableName) assert.Nil(t, b.Config.Resources.QualityMonitors["qualityMonitor2"].Schedule) assert.Equal(t, catalog.MonitorCronSchedulePauseStatusUnpaused, b.Config.Resources.QualityMonitors["qualityMonitor3"].Schedule.PauseStatus) + + // Schema 1 + assert.Equal(t, "dev_lennart_schema1", b.Config.Resources.Schemas["schema1"].Name) } func TestProcessTargetModeDevelopmentTagNormalizationForAws(t *testing.T) { diff --git a/bundle/config/mutator/run_as_test.go b/bundle/config/mutator/run_as_test.go index 67bf7bcc..e6cef9ba 100644 --- a/bundle/config/mutator/run_as_test.go +++ b/bundle/config/mutator/run_as_test.go @@ -39,6 +39,7 @@ func allResourceTypes(t *testing.T) []string { "pipelines", "quality_monitors", "registered_models", + "schemas", }, resourceTypes, ) @@ -136,6 +137,7 @@ func TestRunAsErrorForUnsupportedResources(t *testing.T) { "models", "registered_models", "experiments", + "schemas", } base := config.Root{ diff --git a/bundle/config/resources.go b/bundle/config/resources.go index 062e38ed..6c7a927f 100644 --- a/bundle/config/resources.go +++ b/bundle/config/resources.go @@ -18,6 +18,7 @@ type Resources struct { ModelServingEndpoints map[string]*resources.ModelServingEndpoint `json:"model_serving_endpoints,omitempty"` RegisteredModels map[string]*resources.RegisteredModel `json:"registered_models,omitempty"` QualityMonitors map[string]*resources.QualityMonitor `json:"quality_monitors,omitempty"` + Schemas map[string]*resources.Schema `json:"schemas,omitempty"` } type resource struct { diff --git a/bundle/config/resources/schema.go b/bundle/config/resources/schema.go new file mode 100644 index 00000000..7ab00495 --- /dev/null +++ b/bundle/config/resources/schema.go @@ -0,0 +1,27 @@ +package resources + +import ( + "github.com/databricks/databricks-sdk-go/marshal" + "github.com/databricks/databricks-sdk-go/service/catalog" +) + +type Schema struct { + // List of grants to apply on this schema. + Grants []Grant `json:"grants,omitempty"` + + // Full name of the schema (catalog_name.schema_name). This value is read from + // the terraform state after deployment succeeds. + ID string `json:"id,omitempty" bundle:"readonly"` + + *catalog.CreateSchema + + ModifiedStatus ModifiedStatus `json:"modified_status,omitempty" bundle:"internal"` +} + +func (s *Schema) UnmarshalJSON(b []byte) error { + return marshal.Unmarshal(b, s) +} + +func (s Schema) MarshalJSON() ([]byte, error) { + return marshal.Marshal(s) +} diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index a6ec04d9..f13c241c 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -66,8 +66,10 @@ func convGrants(acl []resources.Grant) *schema.ResourceGrants { // BundleToTerraform converts resources in a bundle configuration // to the equivalent Terraform JSON representation. // -// NOTE: THIS IS CURRENTLY A HACK. WE NEED A BETTER WAY TO -// CONVERT TO/FROM TERRAFORM COMPATIBLE FORMAT. +// Note: This function is an older implementation of the conversion logic. It is +// no longer used in any code paths. It is kept around to be used in tests. +// New resources do not need to modify this function and can instead can define +// the conversion login in the tfdyn package. func BundleToTerraform(config *config.Root) *schema.Root { tfroot := schema.NewRoot() tfroot.Provider = schema.NewProviders() @@ -382,6 +384,16 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { } cur.ID = instance.Attributes.ID config.Resources.QualityMonitors[resource.Name] = cur + case "databricks_schema": + if config.Resources.Schemas == nil { + config.Resources.Schemas = make(map[string]*resources.Schema) + } + cur := config.Resources.Schemas[resource.Name] + if cur == nil { + cur = &resources.Schema{ModifiedStatus: resources.ModifiedStatusDeleted} + } + cur.ID = instance.Attributes.ID + config.Resources.Schemas[resource.Name] = cur case "databricks_permissions": case "databricks_grants": // Ignore; no need to pull these back into the configuration. @@ -426,6 +438,11 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { src.ModifiedStatus = resources.ModifiedStatusCreated } } + for _, src := range config.Resources.Schemas { + if src.ModifiedStatus == "" && src.ID == "" { + src.ModifiedStatus = resources.ModifiedStatusCreated + } + } return nil } diff --git a/bundle/deploy/terraform/convert_test.go b/bundle/deploy/terraform/convert_test.go index 7ea44853..e4ef6114 100644 --- a/bundle/deploy/terraform/convert_test.go +++ b/bundle/deploy/terraform/convert_test.go @@ -655,6 +655,14 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "1"}}, }, }, + { + Type: "databricks_schema", + Mode: "managed", + Name: "test_schema", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "1"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -681,6 +689,9 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { assert.Equal(t, "1", config.Resources.QualityMonitors["test_monitor"].ID) assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.QualityMonitors["test_monitor"].ModifiedStatus) + assert.Equal(t, "1", config.Resources.Schemas["test_schema"].ID) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Schemas["test_schema"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -736,6 +747,13 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { }, }, }, + Schemas: map[string]*resources.Schema{ + "test_schema": { + CreateSchema: &catalog.CreateSchema{ + Name: "test_schema", + }, + }, + }, }, } var tfState = resourcesState{ @@ -765,6 +783,9 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { assert.Equal(t, "", config.Resources.QualityMonitors["test_monitor"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.QualityMonitors["test_monitor"].ModifiedStatus) + assert.Equal(t, "", config.Resources.Schemas["test_schema"].ID) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Schemas["test_schema"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -855,6 +876,18 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { }, }, }, + Schemas: map[string]*resources.Schema{ + "test_schema": { + CreateSchema: &catalog.CreateSchema{ + Name: "test_schema", + }, + }, + "test_schema_new": { + CreateSchema: &catalog.CreateSchema{ + Name: "test_schema_new", + }, + }, + }, }, } var tfState = resourcesState{ @@ -971,6 +1004,22 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "test_monitor_old"}}, }, }, + { + Type: "databricks_schema", + Mode: "managed", + Name: "test_schema", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "1"}}, + }, + }, + { + Type: "databricks_schema", + Mode: "managed", + Name: "test_schema_old", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "2"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -1024,6 +1073,14 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.QualityMonitors["test_monitor_old"].ModifiedStatus) assert.Equal(t, "", config.Resources.QualityMonitors["test_monitor_new"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.QualityMonitors["test_monitor_new"].ModifiedStatus) + + assert.Equal(t, "1", config.Resources.Schemas["test_schema"].ID) + assert.Equal(t, "", config.Resources.Schemas["test_schema"].ModifiedStatus) + assert.Equal(t, "2", config.Resources.Schemas["test_schema_old"].ID) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Schemas["test_schema_old"].ModifiedStatus) + assert.Equal(t, "", config.Resources.Schemas["test_schema_new"].ID) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Schemas["test_schema_new"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } diff --git a/bundle/deploy/terraform/interpolate.go b/bundle/deploy/terraform/interpolate.go index 608f1c79..faa098e1 100644 --- a/bundle/deploy/terraform/interpolate.go +++ b/bundle/deploy/terraform/interpolate.go @@ -56,6 +56,8 @@ func (m *interpolateMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.D path = dyn.NewPath(dyn.Key("databricks_registered_model")).Append(path[2:]...) case dyn.Key("quality_monitors"): path = dyn.NewPath(dyn.Key("databricks_quality_monitor")).Append(path[2:]...) + case dyn.Key("schemas"): + path = dyn.NewPath(dyn.Key("databricks_schema")).Append(path[2:]...) default: // Trigger "key not found" for unknown resource types. return dyn.GetByPath(root, path) diff --git a/bundle/deploy/terraform/interpolate_test.go b/bundle/deploy/terraform/interpolate_test.go index 9af4a144..5ceb243b 100644 --- a/bundle/deploy/terraform/interpolate_test.go +++ b/bundle/deploy/terraform/interpolate_test.go @@ -30,6 +30,7 @@ func TestInterpolate(t *testing.T) { "other_experiment": "${resources.experiments.other_experiment.id}", "other_model_serving": "${resources.model_serving_endpoints.other_model_serving.id}", "other_registered_model": "${resources.registered_models.other_registered_model.id}", + "other_schema": "${resources.schemas.other_schema.id}", }, Tasks: []jobs.Task{ { @@ -65,6 +66,7 @@ func TestInterpolate(t *testing.T) { assert.Equal(t, "${databricks_mlflow_experiment.other_experiment.id}", j.Tags["other_experiment"]) assert.Equal(t, "${databricks_model_serving.other_model_serving.id}", j.Tags["other_model_serving"]) assert.Equal(t, "${databricks_registered_model.other_registered_model.id}", j.Tags["other_registered_model"]) + assert.Equal(t, "${databricks_schema.other_schema.id}", j.Tags["other_schema"]) m := b.Config.Resources.Models["my_model"] assert.Equal(t, "my_model", m.Model.Name) diff --git a/bundle/deploy/terraform/tfdyn/convert_schema.go b/bundle/deploy/terraform/tfdyn/convert_schema.go new file mode 100644 index 00000000..b5e6a88c --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_schema.go @@ -0,0 +1,53 @@ +package tfdyn + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/log" +) + +func convertSchemaResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { + // Normalize the output value to the target schema. + v, diags := convert.Normalize(schema.ResourceSchema{}, vin) + for _, diag := range diags { + log.Debugf(ctx, "schema normalization diagnostic: %s", diag.Summary) + } + + // We always set force destroy as it allows DABs to manage the lifecycle + // of the schema. It's the responsibility of the CLI to ensure the user + // is adequately warned when they try to delete a UC schema. + vout, err := dyn.SetByPath(v, dyn.MustPathFromString("force_destroy"), dyn.V(true)) + if err != nil { + return dyn.InvalidValue, err + } + + return vout, nil +} + +type schemaConverter struct{} + +func (schemaConverter) Convert(ctx context.Context, key string, vin dyn.Value, out *schema.Resources) error { + vout, err := convertSchemaResource(ctx, vin) + if err != nil { + return err + } + + // Add the converted resource to the output. + out.Schema[key] = vout.AsAny() + + // Configure grants for this resource. + if grants := convertGrantsResource(ctx, vin); grants != nil { + grants.Schema = fmt.Sprintf("${databricks_schema.%s.id}", key) + out.Grants["schema_"+key] = grants + } + + return nil +} + +func init() { + registerConverter("schemas", schemaConverter{}) +} diff --git a/bundle/deploy/terraform/tfdyn/convert_schema_test.go b/bundle/deploy/terraform/tfdyn/convert_schema_test.go new file mode 100644 index 00000000..2efbf3e4 --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_schema_test.go @@ -0,0 +1,75 @@ +package tfdyn + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestConvertSchema(t *testing.T) { + var src = resources.Schema{ + CreateSchema: &catalog.CreateSchema{ + Name: "name", + CatalogName: "catalog", + Comment: "comment", + Properties: map[string]string{ + "k1": "v1", + "k2": "v2", + }, + StorageRoot: "root", + }, + Grants: []resources.Grant{ + { + Privileges: []string{"EXECUTE"}, + Principal: "jack@gmail.com", + }, + { + Privileges: []string{"RUN"}, + Principal: "jane@gmail.com", + }, + }, + } + + vin, err := convert.FromTyped(src, dyn.NilValue) + require.NoError(t, err) + + ctx := context.Background() + out := schema.NewResources() + err = schemaConverter{}.Convert(ctx, "my_schema", vin, out) + require.NoError(t, err) + + // Assert equality on the schema + assert.Equal(t, map[string]any{ + "name": "name", + "catalog_name": "catalog", + "comment": "comment", + "properties": map[string]any{ + "k1": "v1", + "k2": "v2", + }, + "force_destroy": true, + "storage_root": "root", + }, out.Schema["my_schema"]) + + // Assert equality on the grants + assert.Equal(t, &schema.ResourceGrants{ + Schema: "${databricks_schema.my_schema.id}", + Grant: []schema.ResourceGrantsGrant{ + { + Privileges: []string{"EXECUTE"}, + Principal: "jack@gmail.com", + }, + { + Privileges: []string{"RUN"}, + Principal: "jane@gmail.com", + }, + }, + }, out.Grants["schema_my_schema"]) +} diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 46c38918..c68153f2 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -1,6 +1,9 @@ package phases import ( + "context" + "fmt" + "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" @@ -14,10 +17,91 @@ import ( "github.com/databricks/cli/bundle/permissions" "github.com/databricks/cli/bundle/python" "github.com/databricks/cli/bundle/scripts" + "github.com/databricks/cli/libs/cmdio" + terraformlib "github.com/databricks/cli/libs/terraform" ) +func approvalForUcSchemaDelete(ctx context.Context, b *bundle.Bundle) (bool, error) { + tf := b.Terraform + if tf == nil { + return false, fmt.Errorf("terraform not initialized") + } + + // read plan file + plan, err := tf.ShowPlanFile(ctx, b.Plan.Path) + if err != nil { + return false, err + } + + actions := make([]terraformlib.Action, 0) + for _, rc := range plan.ResourceChanges { + // We only care about destructive actions on UC schema resources. + if rc.Type != "databricks_schema" { + continue + } + + var actionType terraformlib.ActionType + + switch { + case rc.Change.Actions.Delete(): + actionType = terraformlib.ActionTypeDelete + case rc.Change.Actions.Replace(): + actionType = terraformlib.ActionTypeRecreate + default: + // We don't need a prompt for non-destructive actions like creating + // or updating a schema. + continue + } + + actions = append(actions, terraformlib.Action{ + Action: actionType, + ResourceType: rc.Type, + ResourceName: rc.Name, + }) + } + + // No restricted actions planned. No need for approval. + if len(actions) == 0 { + return true, nil + } + + cmdio.LogString(ctx, "The following UC schemas will be deleted or recreated. Any underlying data may be lost:") + for _, action := range actions { + cmdio.Log(ctx, action) + } + + if b.AutoApprove { + return true, nil + } + + if !cmdio.IsPromptSupported(ctx) { + return false, fmt.Errorf("the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") + } + + cmdio.LogString(ctx, "") + approved, err := cmdio.AskYesOrNo(ctx, "Would you like to proceed?") + if err != nil { + return false, err + } + + return approved, nil +} + // The deploy phase deploys artifacts and resources. func Deploy() bundle.Mutator { + // Core mutators that CRUD resources and modify deployment state. These + // mutators need informed consent if they are potentially destructive. + deployCore := bundle.Defer( + terraform.Apply(), + bundle.Seq( + terraform.StatePush(), + terraform.Load(), + metadata.Compute(), + metadata.Upload(), + bundle.LogString("Deployment complete!"), + ), + ) + deployMutator := bundle.Seq( scripts.Execute(config.ScriptPreDeploy), lock.Acquire(), @@ -37,20 +121,16 @@ func Deploy() bundle.Mutator { terraform.Interpolate(), terraform.Write(), terraform.CheckRunningResource(), - bundle.Defer( - terraform.Apply(), - bundle.Seq( - terraform.StatePush(), - terraform.Load(), - metadata.Compute(), - metadata.Upload(), - ), + terraform.Plan(terraform.PlanGoal("deploy")), + bundle.If( + approvalForUcSchemaDelete, + deployCore, + bundle.LogString("Deployment cancelled!"), ), ), lock.Release(lock.GoalDeploy), ), scripts.Execute(config.ScriptPostDeploy), - bundle.LogString("Deployment complete!"), ) return newPhase( diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index 1232c8de..1166875a 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -24,10 +24,12 @@ func newDeployCommand() *cobra.Command { var forceLock bool var failOnActiveRuns bool var computeID string + var autoApprove bool cmd.Flags().BoolVar(&force, "force", false, "Force-override Git branch validation.") cmd.Flags().BoolVar(&forceLock, "force-lock", false, "Force acquisition of deployment lock.") cmd.Flags().BoolVar(&failOnActiveRuns, "fail-on-active-runs", false, "Fail if there are running jobs or pipelines in the deployment.") cmd.Flags().StringVarP(&computeID, "compute-id", "c", "", "Override compute in the deployment with the given compute ID.") + cmd.Flags().BoolVar(&autoApprove, "auto-approve", false, "Skip interactive approvals that might be required for deployment.") cmd.RunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() @@ -37,10 +39,11 @@ func newDeployCommand() *cobra.Command { bundle.ApplyFunc(ctx, b, func(context.Context, *bundle.Bundle) diag.Diagnostics { b.Config.Bundle.Force = force b.Config.Bundle.Deployment.Lock.Force = forceLock + b.AutoApprove = autoApprove + if cmd.Flag("compute-id").Changed { b.Config.Bundle.ComputeID = computeID } - if cmd.Flag("fail-on-active-runs").Changed { b.Config.Bundle.Deployment.FailOnActiveRuns = failOnActiveRuns } diff --git a/internal/acc/workspace.go b/internal/acc/workspace.go index 8944e199..39374f22 100644 --- a/internal/acc/workspace.go +++ b/internal/acc/workspace.go @@ -2,6 +2,7 @@ package acc import ( "context" + "os" "testing" "github.com/databricks/databricks-sdk-go" @@ -38,6 +39,33 @@ func WorkspaceTest(t *testing.T) (context.Context, *WorkspaceT) { return wt.ctx, wt } +// Run the workspace test only on UC workspaces. +func UcWorkspaceTest(t *testing.T) (context.Context, *WorkspaceT) { + loadDebugEnvIfRunFromIDE(t, "workspace") + + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + if os.Getenv("TEST_METASTORE_ID") == "" { + t.Skipf("Skipping on non-UC workspaces") + } + if os.Getenv("DATABRICKS_ACCOUNT_ID") != "" { + t.Skipf("Skipping on accounts") + } + + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + wt := &WorkspaceT{ + T: t, + + W: w, + + ctx: context.Background(), + } + + return wt.ctx, wt +} + func (t *WorkspaceT) TestClusterID() string { clusterID := GetEnvOrSkipTest(t.T, "TEST_BRICKS_CLUSTER_ID") err := t.W.Clusters.EnsureClusterIsRunning(t.ctx, clusterID) diff --git a/internal/bundle/bundles/uc_schema/databricks_template_schema.json b/internal/bundle/bundles/uc_schema/databricks_template_schema.json new file mode 100644 index 00000000..762f4470 --- /dev/null +++ b/internal/bundle/bundles/uc_schema/databricks_template_schema.json @@ -0,0 +1,8 @@ +{ + "properties": { + "unique_id": { + "type": "string", + "description": "Unique ID for the schema and pipeline names" + } + } +} diff --git a/internal/bundle/bundles/uc_schema/template/databricks.yml.tmpl b/internal/bundle/bundles/uc_schema/template/databricks.yml.tmpl new file mode 100644 index 00000000..961af25e --- /dev/null +++ b/internal/bundle/bundles/uc_schema/template/databricks.yml.tmpl @@ -0,0 +1,19 @@ +bundle: + name: "bundle-playground" + +resources: + pipelines: + foo: + name: test-pipeline-{{.unique_id}} + libraries: + - notebook: + path: ./nb.sql + development: true + catalog: main + +include: + - "*.yml" + +targets: + development: + default: true diff --git a/internal/bundle/bundles/uc_schema/template/nb.sql b/internal/bundle/bundles/uc_schema/template/nb.sql new file mode 100644 index 00000000..199ff507 --- /dev/null +++ b/internal/bundle/bundles/uc_schema/template/nb.sql @@ -0,0 +1,2 @@ +-- Databricks notebook source +select 1 diff --git a/internal/bundle/bundles/uc_schema/template/schema.yml.tmpl b/internal/bundle/bundles/uc_schema/template/schema.yml.tmpl new file mode 100644 index 00000000..50067036 --- /dev/null +++ b/internal/bundle/bundles/uc_schema/template/schema.yml.tmpl @@ -0,0 +1,13 @@ +resources: + schemas: + bar: + name: test-schema-{{.unique_id}} + catalog_name: main + comment: This schema was created from DABs + +targets: + development: + resources: + pipelines: + foo: + target: ${resources.schemas.bar.id} diff --git a/internal/bundle/deploy_test.go b/internal/bundle/deploy_test.go new file mode 100644 index 00000000..3da88570 --- /dev/null +++ b/internal/bundle/deploy_test.go @@ -0,0 +1,125 @@ +package bundle + +import ( + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/internal" + "github.com/databricks/cli/internal/acc" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/databricks/databricks-sdk-go/service/files" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func setupUcSchemaBundle(t *testing.T, ctx context.Context, w *databricks.WorkspaceClient, uniqueId string) string { + bundleRoot, err := initTestTemplate(t, ctx, "uc_schema", map[string]any{ + "unique_id": uniqueId, + }) + require.NoError(t, err) + + err = deployBundle(t, ctx, bundleRoot) + require.NoError(t, err) + + t.Cleanup(func() { + destroyBundle(t, ctx, bundleRoot) + }) + + // Assert the schema is created + catalogName := "main" + schemaName := "test-schema-" + uniqueId + schema, err := w.Schemas.GetByFullName(ctx, strings.Join([]string{catalogName, schemaName}, ".")) + require.NoError(t, err) + require.Equal(t, strings.Join([]string{catalogName, schemaName}, "."), schema.FullName) + require.Equal(t, "This schema was created from DABs", schema.Comment) + + // Assert the pipeline is created + pipelineName := "test-pipeline-" + uniqueId + pipeline, err := w.Pipelines.GetByName(ctx, pipelineName) + require.NoError(t, err) + require.Equal(t, pipelineName, pipeline.Name) + id := pipeline.PipelineId + + // Assert the pipeline uses the schema + i, err := w.Pipelines.GetByPipelineId(ctx, id) + require.NoError(t, err) + require.Equal(t, catalogName, i.Spec.Catalog) + require.Equal(t, strings.Join([]string{catalogName, schemaName}, "."), i.Spec.Target) + + // Create a volume in the schema, and add a file to it. This ensures that the + // schema has some data in it and deletion will fail unless the generated + // terraform configuration has force_destroy set to true. + volumeName := "test-volume-" + uniqueId + volume, err := w.Volumes.Create(ctx, catalog.CreateVolumeRequestContent{ + CatalogName: catalogName, + SchemaName: schemaName, + Name: volumeName, + VolumeType: catalog.VolumeTypeManaged, + }) + require.NoError(t, err) + require.Equal(t, volume.Name, volumeName) + + fileName := "test-file-" + uniqueId + err = w.Files.Upload(ctx, files.UploadRequest{ + Contents: io.NopCloser(strings.NewReader("Hello, world!")), + FilePath: fmt.Sprintf("/Volumes/%s/%s/%s/%s", catalogName, schemaName, volumeName, fileName), + }) + require.NoError(t, err) + + return bundleRoot +} + +func TestAccBundleDeployUcSchema(t *testing.T) { + ctx, wt := acc.UcWorkspaceTest(t) + w := wt.W + + uniqueId := uuid.New().String() + schemaName := "test-schema-" + uniqueId + catalogName := "main" + + bundleRoot := setupUcSchemaBundle(t, ctx, w, uniqueId) + + // Remove the UC schema from the resource configuration. + err := os.Remove(filepath.Join(bundleRoot, "schema.yml")) + require.NoError(t, err) + + // Redeploy the bundle + err = deployBundle(t, ctx, bundleRoot) + require.NoError(t, err) + + // Assert the schema is deleted + _, err = w.Schemas.GetByFullName(ctx, strings.Join([]string{catalogName, schemaName}, ".")) + apiErr := &apierr.APIError{} + assert.True(t, errors.As(err, &apiErr)) + assert.Equal(t, "SCHEMA_DOES_NOT_EXIST", apiErr.ErrorCode) +} + +func TestAccBundleDeployUcSchemaFailsWithoutAutoApprove(t *testing.T) { + ctx, wt := acc.UcWorkspaceTest(t) + w := wt.W + + uniqueId := uuid.New().String() + bundleRoot := setupUcSchemaBundle(t, ctx, w, uniqueId) + + // Remove the UC schema from the resource configuration. + err := os.Remove(filepath.Join(bundleRoot, "schema.yml")) + require.NoError(t, err) + + // Redeploy the bundle + t.Setenv("BUNDLE_ROOT", bundleRoot) + t.Setenv("TERM", "dumb") + c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock") + stdout, _, err := c.Run() + assert.EqualError(t, err, root.ErrAlreadyPrinted.Error()) + assert.Contains(t, stdout.String(), "the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") +} diff --git a/internal/bundle/helpers.go b/internal/bundle/helpers.go index c33c1533..1910a014 100644 --- a/internal/bundle/helpers.go +++ b/internal/bundle/helpers.go @@ -64,7 +64,7 @@ func validateBundle(t *testing.T, ctx context.Context, path string) ([]byte, err func deployBundle(t *testing.T, ctx context.Context, path string) error { t.Setenv("BUNDLE_ROOT", path) - c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock") + c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock", "--auto-approve") _, _, err := c.Run() return err }