databricks-cli/bundle/deploy/terraform/convert.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

286 lines
9.6 KiB
Go
Raw Normal View History

package terraform
import (
"context"
2022-12-15 14:12:47 +00:00
"fmt"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/bundle/deploy/terraform/tfdyn"
"github.com/databricks/cli/bundle/internal/tf/schema"
"github.com/databricks/cli/libs/dyn"
Added support for Databricks Apps in DABs (#1928) ## Changes Now it's possible to configure new `app` resource in bundle and point it to the custom `source_code_path` location where Databricks App code is defined. On `databricks bundle deploy` DABs will create an app. All consecutive `databricks bundle deploy` execution will update an existing app if there are any updated On `databricks bundle run <my_app>` DABs will execute app deployment. If the app is not started yet, it will start the app first. ### Bundle configuration ``` bundle: name: apps variables: my_job_id: description: "ID of job to run app" lookup: job: "My Job" databricks_name: description: "Name for app user" additional_flags: description: "Additional flags to run command app" default: "" my_app_config: type: complex description: "Configuration for my Databricks App" default: command: - flask - --app - hello - run - ${var.additional_flags} env: - name: DATABRICKS_NAME value: ${var.databricks_name} resources: apps: my_app: name: "anester-app" # required and has to be unique description: "My App" source_code_path: ./app # required and points to location of app code config: ${var.my_app_config} resources: - name: "my-job" description: "A job for app to be able to run" job: id: ${var.my_job_id} permission: "CAN_MANAGE_RUN" permissions: - user_name: "foo@bar.com" level: "CAN_VIEW" - service_principal_name: "my_sp" level: "CAN_MANAGE" targets: dev: variables: databricks_name: "Andrew (from dev)" additional_flags: --debug prod: variables: databricks_name: "Andrew (from prod)" ``` ### Execution 1. `databricks bundle deploy -t dev` 2. `databricks bundle run my_app -t dev` **If app is started** ``` ✓ Getting the status of the app my-app ✓ App is in RUNNING state ✓ Preparing source code for new app deployment. ✓ Deployment is pending ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` **If app is not started** ``` ✓ Getting the status of the app my-app ✓ App is in UNAVAILABLE state ✓ Starting the app my-app ✓ App is starting... .... ✓ App is starting... ✓ App is started! ✓ Preparing source code for new app deployment. ✓ Downloading source code from /Workspace/Users/... ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` ## Tests Added unit and config tests + manual test. ``` --- PASS: TestAccDeployBundleWithApp (404.59s) PASS coverage: 36.8% of statements in ./... ok github.com/databricks/cli/internal/bundle 405.035s coverage: 36.8% of statements in ./... ```
2025-01-13 16:43:48 +00:00
"github.com/databricks/databricks-sdk-go/service/apps"
2022-12-15 14:12:47 +00:00
tfjson "github.com/hashicorp/terraform-json"
)
// BundleToTerraformWithDynValue converts resources in a bundle configuration
// to the equivalent Terraform JSON representation.
func BundleToTerraformWithDynValue(ctx context.Context, root dyn.Value) (*schema.Root, error) {
tfroot := schema.NewRoot()
tfroot.Provider = schema.NewProviders()
// Convert each resource in the bundle to the equivalent Terraform representation.
dynResources, err := dyn.Get(root, "resources")
if err != nil {
// If the resources key is missing, return an empty root.
if dyn.IsNoSuchKeyError(err) {
return tfroot, nil
}
return nil, err
}
tfroot.Resource = schema.NewResources()
numResources := 0
_, err = dyn.Walk(dynResources, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
if len(p) < 2 {
return v, nil
}
// Skip resources that have been deleted locally.
modifiedStatus, err := dyn.Get(v, "modified_status")
if err == nil {
modifiedStatusStr, ok := modifiedStatus.AsString()
if ok && modifiedStatusStr == resources.ModifiedStatusDeleted {
return v, dyn.ErrSkip
}
}
typ := p[0].Key()
key := p[1].Key()
// Lookup the converter based on the resource type.
c, ok := tfdyn.GetConverter(typ)
if !ok {
return dyn.InvalidValue, fmt.Errorf("no converter for resource type %s", typ)
}
// Convert resource to Terraform representation.
err = c.Convert(ctx, key, v, tfroot.Resource)
if err != nil {
return dyn.InvalidValue, err
}
numResources++
// Skip traversal of the resource itself.
return v, dyn.ErrSkip
})
if err != nil {
return nil, err
}
// We explicitly set "resource" to nil to omit it from a JSON encoding.
// This is required because the terraform CLI requires >= 1 resources defined
// if the "resource" property is used in a .tf.json file.
if numResources == 0 {
tfroot.Resource = nil
}
return tfroot, nil
}
func TerraformToBundle(state *resourcesState, config *config.Root) error {
for _, resource := range state.Resources {
if resource.Mode != tfjson.ManagedResourceMode {
continue
}
for _, instance := range resource.Instances {
switch resource.Type {
case "databricks_job":
if config.Resources.Jobs == nil {
config.Resources.Jobs = make(map[string]*resources.Job)
}
cur := config.Resources.Jobs[resource.Name]
if cur == nil {
cur = &resources.Job{ModifiedStatus: resources.ModifiedStatusDeleted}
}
cur.ID = instance.Attributes.ID
config.Resources.Jobs[resource.Name] = cur
case "databricks_pipeline":
if config.Resources.Pipelines == nil {
config.Resources.Pipelines = make(map[string]*resources.Pipeline)
}
cur := config.Resources.Pipelines[resource.Name]
if cur == nil {
cur = &resources.Pipeline{ModifiedStatus: resources.ModifiedStatusDeleted}
}
cur.ID = instance.Attributes.ID
config.Resources.Pipelines[resource.Name] = cur
case "databricks_mlflow_model":
if config.Resources.Models == nil {
config.Resources.Models = make(map[string]*resources.MlflowModel)
}
cur := config.Resources.Models[resource.Name]
if cur == nil {
cur = &resources.MlflowModel{ModifiedStatus: resources.ModifiedStatusDeleted}
}
cur.ID = instance.Attributes.ID
config.Resources.Models[resource.Name] = cur
case "databricks_mlflow_experiment":
if config.Resources.Experiments == nil {
config.Resources.Experiments = make(map[string]*resources.MlflowExperiment)
}
cur := config.Resources.Experiments[resource.Name]
if cur == nil {
cur = &resources.MlflowExperiment{ModifiedStatus: resources.ModifiedStatusDeleted}
}
cur.ID = instance.Attributes.ID
config.Resources.Experiments[resource.Name] = cur
case "databricks_model_serving":
if config.Resources.ModelServingEndpoints == nil {
config.Resources.ModelServingEndpoints = make(map[string]*resources.ModelServingEndpoint)
}
cur := config.Resources.ModelServingEndpoints[resource.Name]
if cur == nil {
cur = &resources.ModelServingEndpoint{ModifiedStatus: resources.ModifiedStatusDeleted}
}
cur.ID = instance.Attributes.ID
config.Resources.ModelServingEndpoints[resource.Name] = cur
case "databricks_registered_model":
if config.Resources.RegisteredModels == nil {
config.Resources.RegisteredModels = make(map[string]*resources.RegisteredModel)
}
cur := config.Resources.RegisteredModels[resource.Name]
if cur == nil {
cur = &resources.RegisteredModel{ModifiedStatus: resources.ModifiedStatusDeleted}
}
cur.ID = instance.Attributes.ID
config.Resources.RegisteredModels[resource.Name] = cur
case "databricks_quality_monitor":
if config.Resources.QualityMonitors == nil {
config.Resources.QualityMonitors = make(map[string]*resources.QualityMonitor)
}
cur := config.Resources.QualityMonitors[resource.Name]
if cur == nil {
cur = &resources.QualityMonitor{ModifiedStatus: resources.ModifiedStatusDeleted}
}
cur.ID = instance.Attributes.ID
config.Resources.QualityMonitors[resource.Name] = cur
case "databricks_schema":
if config.Resources.Schemas == nil {
config.Resources.Schemas = make(map[string]*resources.Schema)
}
cur := config.Resources.Schemas[resource.Name]
if cur == nil {
cur = &resources.Schema{ModifiedStatus: resources.ModifiedStatusDeleted}
}
cur.ID = instance.Attributes.ID
config.Resources.Schemas[resource.Name] = cur
Add DABs support for Unity Catalog volumes (#1762) ## Changes This PR adds support for UC volumes to DABs. ### Can I use a UC volume managed by DABs in `artifact_path`? Yes, but we require the volume to exist before being referenced in `artifact_path`. Otherwise you'll see an error that the volume does not exist. For this case, this PR also adds a warning if we detect that the UC volume is defined in the DAB itself, which informs the user to deploy the UC volume in a separate deployment first before using it in `artifact_path`. We cannot create the UC volume and then upload the artifacts to it in the same `bundle deploy` because `bundle deploy` always uploads the artifacts to `artifact_path` before materializing any resources defined in the bundle. Supporting this in a single deployment requires us to migrate away from our dependency on the Databricks Terraform provider to manage the CRUD lifecycle of DABs resources. ### Why do we not support `preset.name_prefix` for UC volumes? UC volumes will not have a `dev_shreyas_goenka` prefix added in `mode: development`. Configuring `presets.name_prefix` will be a no-op for UC volumes. We have decided not to support prefixing for UC resources. This is because: 1. UC provides its own namespace hierarchy that is independent of DABs. 2. Users can always manually use `${workspace.current_user.short_name}` to configure the prefixes manually. Customers often manually set up a UC hierarchy for dev and prod, including a schema or catalog per developer. Thus, it's often unnecessary for us to add prefixing in `mode: development` by default for UC resources. In retrospect, supporting prefixing for UC schemas and registered models was a mistake and will be removed in a future release of DABs. ## Tests Unit, integration test, and manually. ### Manual Testing cases: 1. UC volume does not exist: ``` ➜ bundle-playground git:(master) ✗ cli bundle deploy Error: failed to fetch metadata for the UC volume /Volumes/main/caps/my_volume that is configured in the artifact_path: Not Found ``` 2. UC Volume does not exist, but is defined in the DAB ``` ➜ bundle-playground git:(master) ✗ cli bundle deploy Error: failed to fetch metadata for the UC volume /Volumes/main/caps/managed_by_dab that is configured in the artifact_path: Not Found Warning: You might be using a UC volume in your artifact_path that is managed by this bundle but which has not been deployed yet. Please deploy the UC volume in a separate bundle deploy before using it in the artifact_path. at resources.volumes.bar in databricks.yml:24:7 ``` --------- Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
2024-12-02 21:18:07 +00:00
case "databricks_volume":
if config.Resources.Volumes == nil {
config.Resources.Volumes = make(map[string]*resources.Volume)
}
cur := config.Resources.Volumes[resource.Name]
if cur == nil {
cur = &resources.Volume{ModifiedStatus: resources.ModifiedStatusDeleted}
}
cur.ID = instance.Attributes.ID
config.Resources.Volumes[resource.Name] = cur
case "databricks_cluster":
if config.Resources.Clusters == nil {
config.Resources.Clusters = make(map[string]*resources.Cluster)
}
cur := config.Resources.Clusters[resource.Name]
if cur == nil {
cur = &resources.Cluster{ModifiedStatus: resources.ModifiedStatusDeleted}
}
cur.ID = instance.Attributes.ID
config.Resources.Clusters[resource.Name] = cur
case "databricks_dashboard":
if config.Resources.Dashboards == nil {
config.Resources.Dashboards = make(map[string]*resources.Dashboard)
}
cur := config.Resources.Dashboards[resource.Name]
if cur == nil {
cur = &resources.Dashboard{ModifiedStatus: resources.ModifiedStatusDeleted}
}
cur.ID = instance.Attributes.ID
config.Resources.Dashboards[resource.Name] = cur
Added support for Databricks Apps in DABs (#1928) ## Changes Now it's possible to configure new `app` resource in bundle and point it to the custom `source_code_path` location where Databricks App code is defined. On `databricks bundle deploy` DABs will create an app. All consecutive `databricks bundle deploy` execution will update an existing app if there are any updated On `databricks bundle run <my_app>` DABs will execute app deployment. If the app is not started yet, it will start the app first. ### Bundle configuration ``` bundle: name: apps variables: my_job_id: description: "ID of job to run app" lookup: job: "My Job" databricks_name: description: "Name for app user" additional_flags: description: "Additional flags to run command app" default: "" my_app_config: type: complex description: "Configuration for my Databricks App" default: command: - flask - --app - hello - run - ${var.additional_flags} env: - name: DATABRICKS_NAME value: ${var.databricks_name} resources: apps: my_app: name: "anester-app" # required and has to be unique description: "My App" source_code_path: ./app # required and points to location of app code config: ${var.my_app_config} resources: - name: "my-job" description: "A job for app to be able to run" job: id: ${var.my_job_id} permission: "CAN_MANAGE_RUN" permissions: - user_name: "foo@bar.com" level: "CAN_VIEW" - service_principal_name: "my_sp" level: "CAN_MANAGE" targets: dev: variables: databricks_name: "Andrew (from dev)" additional_flags: --debug prod: variables: databricks_name: "Andrew (from prod)" ``` ### Execution 1. `databricks bundle deploy -t dev` 2. `databricks bundle run my_app -t dev` **If app is started** ``` ✓ Getting the status of the app my-app ✓ App is in RUNNING state ✓ Preparing source code for new app deployment. ✓ Deployment is pending ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` **If app is not started** ``` ✓ Getting the status of the app my-app ✓ App is in UNAVAILABLE state ✓ Starting the app my-app ✓ App is starting... .... ✓ App is starting... ✓ App is started! ✓ Preparing source code for new app deployment. ✓ Downloading source code from /Workspace/Users/... ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` ## Tests Added unit and config tests + manual test. ``` --- PASS: TestAccDeployBundleWithApp (404.59s) PASS coverage: 36.8% of statements in ./... ok github.com/databricks/cli/internal/bundle 405.035s coverage: 36.8% of statements in ./... ```
2025-01-13 16:43:48 +00:00
case "databricks_app":
if config.Resources.Apps == nil {
config.Resources.Apps = make(map[string]*resources.App)
}
cur := config.Resources.Apps[resource.Name]
if cur == nil {
cur = &resources.App{ModifiedStatus: resources.ModifiedStatusDeleted, App: &apps.App{}}
} else {
// If the app exists in terraform and bundle, we always set modified status to updated
// because we don't really know if the app source code was updated or not.
cur.ModifiedStatus = resources.ModifiedStatusUpdated
}
cur.Name = instance.Attributes.Name
config.Resources.Apps[resource.Name] = cur
case "databricks_permissions":
case "databricks_grants":
// Ignore; no need to pull these back into the configuration.
default:
return fmt.Errorf("missing mapping for %s", resource.Type)
}
2022-12-15 14:12:47 +00:00
}
}
2022-12-15 14:12:47 +00:00
for _, src := range config.Resources.Jobs {
if src.ModifiedStatus == "" && src.ID == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
}
}
for _, src := range config.Resources.Pipelines {
if src.ModifiedStatus == "" && src.ID == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
}
}
for _, src := range config.Resources.Models {
if src.ModifiedStatus == "" && src.ID == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
}
}
for _, src := range config.Resources.Experiments {
if src.ModifiedStatus == "" && src.ID == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
}
}
for _, src := range config.Resources.ModelServingEndpoints {
if src.ModifiedStatus == "" && src.ID == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
}
}
for _, src := range config.Resources.RegisteredModels {
if src.ModifiedStatus == "" && src.ID == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
2022-12-15 14:12:47 +00:00
}
}
for _, src := range config.Resources.QualityMonitors {
if src.ModifiedStatus == "" && src.ID == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
}
}
for _, src := range config.Resources.Schemas {
if src.ModifiedStatus == "" && src.ID == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
}
}
Add DABs support for Unity Catalog volumes (#1762) ## Changes This PR adds support for UC volumes to DABs. ### Can I use a UC volume managed by DABs in `artifact_path`? Yes, but we require the volume to exist before being referenced in `artifact_path`. Otherwise you'll see an error that the volume does not exist. For this case, this PR also adds a warning if we detect that the UC volume is defined in the DAB itself, which informs the user to deploy the UC volume in a separate deployment first before using it in `artifact_path`. We cannot create the UC volume and then upload the artifacts to it in the same `bundle deploy` because `bundle deploy` always uploads the artifacts to `artifact_path` before materializing any resources defined in the bundle. Supporting this in a single deployment requires us to migrate away from our dependency on the Databricks Terraform provider to manage the CRUD lifecycle of DABs resources. ### Why do we not support `preset.name_prefix` for UC volumes? UC volumes will not have a `dev_shreyas_goenka` prefix added in `mode: development`. Configuring `presets.name_prefix` will be a no-op for UC volumes. We have decided not to support prefixing for UC resources. This is because: 1. UC provides its own namespace hierarchy that is independent of DABs. 2. Users can always manually use `${workspace.current_user.short_name}` to configure the prefixes manually. Customers often manually set up a UC hierarchy for dev and prod, including a schema or catalog per developer. Thus, it's often unnecessary for us to add prefixing in `mode: development` by default for UC resources. In retrospect, supporting prefixing for UC schemas and registered models was a mistake and will be removed in a future release of DABs. ## Tests Unit, integration test, and manually. ### Manual Testing cases: 1. UC volume does not exist: ``` ➜ bundle-playground git:(master) ✗ cli bundle deploy Error: failed to fetch metadata for the UC volume /Volumes/main/caps/my_volume that is configured in the artifact_path: Not Found ``` 2. UC Volume does not exist, but is defined in the DAB ``` ➜ bundle-playground git:(master) ✗ cli bundle deploy Error: failed to fetch metadata for the UC volume /Volumes/main/caps/managed_by_dab that is configured in the artifact_path: Not Found Warning: You might be using a UC volume in your artifact_path that is managed by this bundle but which has not been deployed yet. Please deploy the UC volume in a separate bundle deploy before using it in the artifact_path. at resources.volumes.bar in databricks.yml:24:7 ``` --------- Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
2024-12-02 21:18:07 +00:00
for _, src := range config.Resources.Volumes {
if src.ModifiedStatus == "" && src.ID == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
}
}
for _, src := range config.Resources.Clusters {
if src.ModifiedStatus == "" && src.ID == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
}
}
for _, src := range config.Resources.Dashboards {
if src.ModifiedStatus == "" && src.ID == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
}
}
Added support for Databricks Apps in DABs (#1928) ## Changes Now it's possible to configure new `app` resource in bundle and point it to the custom `source_code_path` location where Databricks App code is defined. On `databricks bundle deploy` DABs will create an app. All consecutive `databricks bundle deploy` execution will update an existing app if there are any updated On `databricks bundle run <my_app>` DABs will execute app deployment. If the app is not started yet, it will start the app first. ### Bundle configuration ``` bundle: name: apps variables: my_job_id: description: "ID of job to run app" lookup: job: "My Job" databricks_name: description: "Name for app user" additional_flags: description: "Additional flags to run command app" default: "" my_app_config: type: complex description: "Configuration for my Databricks App" default: command: - flask - --app - hello - run - ${var.additional_flags} env: - name: DATABRICKS_NAME value: ${var.databricks_name} resources: apps: my_app: name: "anester-app" # required and has to be unique description: "My App" source_code_path: ./app # required and points to location of app code config: ${var.my_app_config} resources: - name: "my-job" description: "A job for app to be able to run" job: id: ${var.my_job_id} permission: "CAN_MANAGE_RUN" permissions: - user_name: "foo@bar.com" level: "CAN_VIEW" - service_principal_name: "my_sp" level: "CAN_MANAGE" targets: dev: variables: databricks_name: "Andrew (from dev)" additional_flags: --debug prod: variables: databricks_name: "Andrew (from prod)" ``` ### Execution 1. `databricks bundle deploy -t dev` 2. `databricks bundle run my_app -t dev` **If app is started** ``` ✓ Getting the status of the app my-app ✓ App is in RUNNING state ✓ Preparing source code for new app deployment. ✓ Deployment is pending ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` **If app is not started** ``` ✓ Getting the status of the app my-app ✓ App is in UNAVAILABLE state ✓ Starting the app my-app ✓ App is starting... .... ✓ App is starting... ✓ App is started! ✓ Preparing source code for new app deployment. ✓ Downloading source code from /Workspace/Users/... ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` ## Tests Added unit and config tests + manual test. ``` --- PASS: TestAccDeployBundleWithApp (404.59s) PASS coverage: 36.8% of statements in ./... ok github.com/databricks/cli/internal/bundle 405.035s coverage: 36.8% of statements in ./... ```
2025-01-13 16:43:48 +00:00
for _, src := range config.Resources.Apps {
if src.ModifiedStatus == "" {
src.ModifiedStatus = resources.ModifiedStatusCreated
}
}
2022-12-15 14:12:47 +00:00
return nil
}