databricks-cli/bundle/config/mutator/process_environment_mode.go

275 lines
7.7 KiB
Go
Raw Normal View History

<<<<<<< HEAD
package mutator
import (
"context"
"fmt"
2023-07-07 16:04:28 +00:00
"path"
2023-07-10 07:12:50 +00:00
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
2023-07-10 07:12:50 +00:00
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/service/iam"
2023-07-07 09:12:14 +00:00
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/ml"
)
2023-07-10 07:21:14 +00:00
type processEnvironmentMode struct {}
2023-07-07 09:09:09 +00:00
const developmentConcurrentRuns = 4
2023-07-10 07:12:50 +00:00
func ProcessEnvironmentMode() *processEnvironmentMode {
return &processEnvironmentMode{}
}
func (m *processEnvironmentMode) Name() string {
return "ProcessEnvironmentMode"
}
2023-07-03 14:30:42 +00:00
// Mark all resources as being for 'development' purposes, i.e.
// changing their their name, adding tags, and (in the future)
// marking them as 'hidden' in the UI.
2023-07-10 07:12:50 +00:00
func transformDevelopmentMode(b *bundle.Bundle) error {
r := b.Config.Resources
2023-07-10 07:12:50 +00:00
prefix := "[dev " + b.Config.Workspace.CurrentUser.ShortName + "] "
for i := range r.Jobs {
2023-07-10 07:12:50 +00:00
r.Jobs[i].Name = prefix + r.Jobs[i].Name
if r.Jobs[i].Tags == nil {
r.Jobs[i].Tags = make(map[string]string)
}
2023-07-10 07:12:50 +00:00
r.Jobs[i].Tags["dev"] = b.Config.Workspace.CurrentUser.DisplayName
if r.Jobs[i].MaxConcurrentRuns == 0 {
2023-07-07 09:09:09 +00:00
r.Jobs[i].MaxConcurrentRuns = developmentConcurrentRuns
}
2023-06-20 09:21:33 +00:00
if r.Jobs[i].Schedule != nil {
2023-07-07 09:12:14 +00:00
r.Jobs[i].Schedule.PauseStatus = jobs.PauseStatusPaused
2023-06-20 09:21:33 +00:00
}
if r.Jobs[i].Continuous != nil {
2023-07-07 09:12:14 +00:00
r.Jobs[i].Continuous.PauseStatus = jobs.PauseStatusPaused
2023-06-20 09:21:33 +00:00
}
if r.Jobs[i].Trigger != nil {
2023-07-07 09:12:14 +00:00
r.Jobs[i].Trigger.PauseStatus = jobs.PauseStatusPaused
2023-06-20 09:21:33 +00:00
}
}
for i := range r.Pipelines {
2023-07-10 07:12:50 +00:00
r.Pipelines[i].Name = prefix + r.Pipelines[i].Name
r.Pipelines[i].Development = true
// (pipelines don't yet support tags)
}
for i := range r.Models {
2023-07-10 07:12:50 +00:00
r.Models[i].Name = prefix + r.Models[i].Name
2023-07-03 14:30:42 +00:00
r.Models[i].Tags = append(r.Models[i].Tags, ml.ModelTag{Key: "dev", Value: ""})
}
for i := range r.Experiments {
2023-07-07 16:04:28 +00:00
filepath := r.Experiments[i].Name
dir := path.Dir(filepath)
base := path.Base(filepath)
if dir == "." {
2023-07-10 07:12:50 +00:00
r.Experiments[i].Name = prefix + base
} else {
2023-07-10 07:12:50 +00:00
r.Experiments[i].Name = dir + "/" + prefix + base
}
2023-07-10 07:12:50 +00:00
r.Experiments[i].Tags = append(r.Experiments[i].Tags, ml.ExperimentTag{Key: "dev", Value: b.Config.Workspace.CurrentUser.DisplayName})
}
return nil
}
2023-07-10 07:12:50 +00:00
func validateDevelopmentMode(b *bundle.Bundle) error {
if isUserSpecificDeployment(b) {
return fmt.Errorf("environment with 'mode: development' must deploy to a location specific to the user, and should e.g. set 'root_path: ~/.bundle/${bundle.name}/${bundle.environment}'")
}
return nil
}
func isUserSpecificDeployment(b *bundle.Bundle) bool {
username := b.Config.Workspace.CurrentUser.UserName
return !strings.Contains(b.Config.Workspace.StatePath, username) ||
!strings.Contains(b.Config.Workspace.ArtifactsPath, username) ||
!strings.Contains(b.Config.Workspace.FilesPath, username)
}
2023-07-10 07:21:14 +00:00
func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUsed bool) error {
2023-07-10 07:12:50 +00:00
if b.Config.Bundle.Git.Inferred {
TODO: show a nice human error here? :(
return fmt.Errorf("environment with 'mode: production' must specify an explicit 'git' configuration")
}
r := b.Config.Resources
for i := range r.Pipelines {
if r.Pipelines[i].Development {
return fmt.Errorf("environment with 'mode: production' cannot specify a pipeline with 'development: true'")
}
}
2023-07-10 07:21:14 +00:00
if !isPrincipalUsed {
2023-07-10 07:12:50 +00:00
if isUserSpecificDeployment(b) {
return fmt.Errorf("environment with 'mode: development' must deploy to a location specific to the user, and should e.g. set 'root_path: ~/.bundle/${bundle.name}/${bundle.environment}'")
}
if !arePermissionsSetExplicitly(r) {
return fmt.Errorf("environment with 'mode: production' must set permissions and run_as for all resources (when not using service principals)")
}
}
return nil
}
// Determines whether a service principal identity is used to run the CLI.
2023-07-10 07:21:14 +00:00
func isServicePrincipalUsed(ctx context.Context, b *bundle.Bundle) (bool, error) {
2023-07-10 07:12:50 +00:00
ws := b.WorkspaceClient()
2023-07-10 07:21:14 +00:00
_, err := ws.ServicePrincipals.GetById(ctx, b.Config.Workspace.CurrentUser.Id)
2023-07-10 07:12:50 +00:00
if err != nil {
apiError, ok := err.(*apierr.APIError)
if ok && apiError.StatusCode == 404 {
return false, nil
}
return false, err
}
return false, nil
}
// Determines whether permissions and run_as are explicitly set for all resources.
// We do this in a best-effort fashion; we may not actually test all resources,
// as we expect customers to use the top-level 'permissions' and 'run_as' fields.
// We'd rather not check for those specific fields though, as customers might
// set specific permissions instead!
func arePermissionsSetExplicitly(r config.Resources) bool {
for i := range r.Pipelines {
if r.Pipelines[i].Permissions == nil {
return false
}
}
for i := range r.Jobs {
if r.Jobs[i].Permissions == nil {
return false
}
if r.Jobs[i].RunAs == nil {
return false
}
}
return false
}
func (m *processEnvironmentMode) Apply(ctx context.Context, b *bundle.Bundle) error {
switch b.Config.Bundle.Mode {
2023-07-03 14:30:42 +00:00
case config.Development:
2023-07-10 07:12:50 +00:00
err := validateDevelopmentMode(b)
if err != nil {
return err
}
return transformDevelopmentMode(b)
case config.Production:
2023-07-10 07:21:14 +00:00
isPrincipal, err := m.isServicePrincipalUsed(ctx, b)
if err != nil {
return err
}
return validateProductionMode(ctx, b, isPrincipal)
2023-07-03 14:30:42 +00:00
case "":
// No action
default:
return fmt.Errorf("unsupported value specified for 'mode': %s", b.Config.Bundle.Mode)
}
return nil
}
||||||| 3354750
=======
Add development runs (#522) This implements the "development run" functionality that we desire for DABs in the workspace / IDE. ## bundle.yml changes In bundle.yml, there should be a "dev" environment that is marked as `mode: debug`: ``` environments: dev: default: true mode: development # future accepted values might include pull_request, production ``` Setting `mode` to `development` indicates that this environment is used just for running things for development. This results in several changes to deployed assets: * All assets will get '[dev]' in their name and will get a 'dev' tag * All assets will be hidden from the list of assets (future work; e.g. for jobs we would have a special job_type that hides it from the list) * All deployed assets will be ephemeral (future work, we need some form of garbage collection) * Pipelines will be marked as 'development: true' * Jobs can run on development compute through the `--compute` parameter in the CLI * Jobs get their schedule / triggers paused * Jobs get concurrent runs (it's really annoying if your runs get skipped because the last run was still in progress) Other accepted values for `mode` are `default` (which does nothing) and `pull-request` (which is reserved for future use). ## CLI changes To run a single job called "shark_sighting" on existing compute, use the following commands: ``` $ databricks bundle deploy --compute 0617-201942-9yd9g8ix $ databricks bundle run shark_sighting ``` which would deploy and run a job called "[dev] shark_sightings" on the compute provided. Note that `--compute` is not accepted in production environments, so we show an error if `mode: development` is not used. The `run --deploy` command offers a convenient shorthand for the common combination of deploying & running: ``` $ export DATABRICKS_COMPUTE=0617-201942-9yd9g8ix $ bundle run --deploy shark_sightings ``` The `--deploy` addition isn't really essential and I welcome feedback 🤔 I played with the idea of a "debug" or "dev" command but that seemed to only make the option space even broader for users. The above could work well with an IDE or workspace that automatically sets the target compute. One more thing I added is`run --no-wait` can now be used to run something without waiting for it to be completed (useful for IDE-like environments that can display progress themselves). ``` $ bundle run --deploy shark_sightings --no-wait ```
2023-07-12 06:51:54 +00:00
package mutator
import (
"context"
"fmt"
"path"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/ml"
)
type processEnvironmentMode struct{}
const developmentConcurrentRuns = 4
func ProcessEnvironmentMode() bundle.Mutator {
return &processEnvironmentMode{}
}
func (m *processEnvironmentMode) Name() string {
return "ProcessEnvironmentMode"
}
// Mark all resources as being for 'development' purposes, i.e.
// changing their their name, adding tags, and (in the future)
// marking them as 'hidden' in the UI.
func processDevelopmentMode(b *bundle.Bundle) error {
r := b.Config.Resources
for i := range r.Jobs {
r.Jobs[i].Name = "[dev] " + r.Jobs[i].Name
if r.Jobs[i].Tags == nil {
r.Jobs[i].Tags = make(map[string]string)
}
r.Jobs[i].Tags["dev"] = ""
if r.Jobs[i].MaxConcurrentRuns == 0 {
r.Jobs[i].MaxConcurrentRuns = developmentConcurrentRuns
}
if r.Jobs[i].Schedule != nil {
r.Jobs[i].Schedule.PauseStatus = jobs.PauseStatusPaused
}
if r.Jobs[i].Continuous != nil {
r.Jobs[i].Continuous.PauseStatus = jobs.PauseStatusPaused
}
if r.Jobs[i].Trigger != nil {
r.Jobs[i].Trigger.PauseStatus = jobs.PauseStatusPaused
}
}
for i := range r.Pipelines {
r.Pipelines[i].Name = "[dev] " + r.Pipelines[i].Name
r.Pipelines[i].Development = true
// (pipelines don't yet support tags)
}
for i := range r.Models {
r.Models[i].Name = "[dev] " + r.Models[i].Name
r.Models[i].Tags = append(r.Models[i].Tags, ml.ModelTag{Key: "dev", Value: ""})
}
for i := range r.Experiments {
filepath := r.Experiments[i].Name
dir := path.Dir(filepath)
base := path.Base(filepath)
if dir == "." {
r.Experiments[i].Name = "[dev] " + base
} else {
r.Experiments[i].Name = dir + "/[dev] " + base
}
r.Experiments[i].Tags = append(r.Experiments[i].Tags, ml.ExperimentTag{Key: "dev", Value: ""})
}
return nil
}
func (m *processEnvironmentMode) Apply(ctx context.Context, b *bundle.Bundle) error {
switch b.Config.Bundle.Mode {
case config.Development:
return processDevelopmentMode(b)
case "":
// No action
default:
return fmt.Errorf("unsupported value specified for 'mode': %s", b.Config.Bundle.Mode)
}
return nil
}
>>>>>>> databricks/main