databricks-cli/cmd/bundle/run.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

211 lines
5.5 KiB
Go
Raw Normal View History

2022-12-15 14:12:47 +00:00
package bundle
import (
"context"
"encoding/json"
"errors"
"fmt"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/deploy/terraform"
"github.com/databricks/cli/bundle/phases"
"github.com/databricks/cli/bundle/resources"
"github.com/databricks/cli/bundle/run"
"github.com/databricks/cli/bundle/run/output"
"github.com/databricks/cli/cmd/bundle/utils"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/flags"
2022-12-15 14:12:47 +00:00
"github.com/spf13/cobra"
"golang.org/x/exp/maps"
2022-12-15 14:12:47 +00:00
)
func promptRunArgument(ctx context.Context, b *bundle.Bundle) (string, error) {
// Compute map of "Human readable name of resource" -> "resource key".
inv := make(map[string]string)
for k, ref := range resources.Completions(b, run.IsRunnable) {
title := fmt.Sprintf("%s: %s", ref.Description.SingularTitle, ref.Resource.GetName())
inv[title] = k
}
key, err := cmdio.Select(ctx, inv, "Resource to run")
if err != nil {
return "", err
}
return key, nil
}
// resolveRunArgument resolves the resource key to run.
// It returns the remaining arguments to pass to the runner, if applicable.
func resolveRunArgument(ctx context.Context, b *bundle.Bundle, args []string) (string, []string, error) {
// If no arguments are specified, prompt the user to select something to run.
if len(args) == 0 && cmdio.IsPromptSupported(ctx) {
key, err := promptRunArgument(ctx, b)
if err != nil {
return "", nil, err
}
return key, args, nil
}
if len(args) < 1 {
return "", nil, errors.New("expected a KEY of the resource to run")
}
return args[0], args[1:], nil
}
func keyToRunner(b *bundle.Bundle, arg string) (run.Runner, error) {
// Locate the resource to run.
ref, err := resources.Lookup(b, arg, run.IsRunnable)
if err != nil {
return nil, err
}
// Convert the resource to a runnable resource.
runner, err := run.ToRunner(b, ref)
if err != nil {
return nil, err
}
return runner, nil
}
func newRunCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "run [flags] KEY",
Short: "Run a job or pipeline update",
Long: `Run the job or pipeline identified by KEY.
The KEY is the unique identifier of the resource to run. In addition to
customizing the run using any of the available flags, you can also specify
keyword or positional arguments as shown in these examples:
databricks bundle run my_job -- --key1 value1 --key2 value2
Or:
databricks bundle run my_job -- value1 value2 value3
If the specified job uses job parameters or the job has a notebook task with
parameters, the first example applies and flag names are mapped to the
parameter names.
If the specified job does not use job parameters and the job has a Python file
task or a Python wheel task, the second example applies.
`,
}
2022-12-15 14:12:47 +00:00
var runOptions run.Options
Group bundle run flags by job and pipeline types (#1174) ## Changes Group bundle run flags by job and pipeline types ## Tests ``` Run a resource (e.g. a job or a pipeline) Usage: databricks bundle run [flags] KEY Job Flags: --dbt-commands strings A list of commands to execute for jobs with DBT tasks. --jar-params strings A list of parameters for jobs with Spark JAR tasks. --notebook-params stringToString A map from keys to values for jobs with notebook tasks. (default []) --params stringToString comma separated k=v pairs for job parameters (default []) --pipeline-params stringToString A map from keys to values for jobs with pipeline tasks. (default []) --python-named-params stringToString A map from keys to values for jobs with Python wheel tasks. (default []) --python-params strings A list of parameters for jobs with Python tasks. --spark-submit-params strings A list of parameters for jobs with Spark submit tasks. --sql-params stringToString A map from keys to values for jobs with SQL tasks. (default []) Pipeline Flags: --full-refresh strings List of tables to reset and recompute. --full-refresh-all Perform a full graph reset and recompute. --refresh strings List of tables to update. --refresh-all Perform a full graph update. Flags: -h, --help help for run --no-wait Don't wait for the run to complete. Global Flags: --debug enable debug logging -o, --output type output type: text or json (default text) -p, --profile string ~/.databrickscfg profile -t, --target string bundle target to use (if applicable) --var strings set values for variables defined in bundle config. Example: --var="foo=bar" ```
2024-02-06 14:51:02 +00:00
runOptions.Define(cmd)
var noWait bool
var restart bool
cmd.Flags().BoolVar(&noWait, "no-wait", false, "Don't wait for the run to complete.")
cmd.Flags().BoolVar(&restart, "restart", false, "Restart the run if it is already running.")
cmd.RunE = func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
b, diags := utils.ConfigureBundleWithVariables(cmd)
if err := diags.Error(); err != nil {
return diags.Error()
}
Add development runs (#522) This implements the "development run" functionality that we desire for DABs in the workspace / IDE. ## bundle.yml changes In bundle.yml, there should be a "dev" environment that is marked as `mode: debug`: ``` environments: dev: default: true mode: development # future accepted values might include pull_request, production ``` Setting `mode` to `development` indicates that this environment is used just for running things for development. This results in several changes to deployed assets: * All assets will get '[dev]' in their name and will get a 'dev' tag * All assets will be hidden from the list of assets (future work; e.g. for jobs we would have a special job_type that hides it from the list) * All deployed assets will be ephemeral (future work, we need some form of garbage collection) * Pipelines will be marked as 'development: true' * Jobs can run on development compute through the `--compute` parameter in the CLI * Jobs get their schedule / triggers paused * Jobs get concurrent runs (it's really annoying if your runs get skipped because the last run was still in progress) Other accepted values for `mode` are `default` (which does nothing) and `pull-request` (which is reserved for future use). ## CLI changes To run a single job called "shark_sighting" on existing compute, use the following commands: ``` $ databricks bundle deploy --compute 0617-201942-9yd9g8ix $ databricks bundle run shark_sighting ``` which would deploy and run a job called "[dev] shark_sightings" on the compute provided. Note that `--compute` is not accepted in production environments, so we show an error if `mode: development` is not used. The `run --deploy` command offers a convenient shorthand for the common combination of deploying & running: ``` $ export DATABRICKS_COMPUTE=0617-201942-9yd9g8ix $ bundle run --deploy shark_sightings ``` The `--deploy` addition isn't really essential and I welcome feedback 🤔 I played with the idea of a "debug" or "dev" command but that seemed to only make the option space even broader for users. The above could work well with an IDE or workspace that automatically sets the target compute. One more thing I added is`run --no-wait` can now be used to run something without waiting for it to be completed (useful for IDE-like environments that can display progress themselves). ``` $ bundle run --deploy shark_sightings --no-wait ```
2023-07-12 06:51:54 +00:00
diags = bundle.Apply(ctx, b, phases.Initialize())
if err := diags.Error(); err != nil {
2022-12-15 14:12:47 +00:00
return err
}
key, args, err := resolveRunArgument(ctx, b, args)
if err != nil {
return err
}
diags = bundle.Apply(ctx, b, bundle.Seq(
terraform.Interpolate(),
terraform.Write(),
terraform.StatePull(),
terraform.Load(terraform.ErrorOnEmptyState),
))
if err := diags.Error(); err != nil {
return err
}
runner, err := keyToRunner(b, key)
2022-12-15 14:12:47 +00:00
if err != nil {
return err
}
// Parse additional positional arguments.
err = runner.ParseArgs(args, &runOptions)
if err != nil {
return err
}
Add development runs (#522) This implements the "development run" functionality that we desire for DABs in the workspace / IDE. ## bundle.yml changes In bundle.yml, there should be a "dev" environment that is marked as `mode: debug`: ``` environments: dev: default: true mode: development # future accepted values might include pull_request, production ``` Setting `mode` to `development` indicates that this environment is used just for running things for development. This results in several changes to deployed assets: * All assets will get '[dev]' in their name and will get a 'dev' tag * All assets will be hidden from the list of assets (future work; e.g. for jobs we would have a special job_type that hides it from the list) * All deployed assets will be ephemeral (future work, we need some form of garbage collection) * Pipelines will be marked as 'development: true' * Jobs can run on development compute through the `--compute` parameter in the CLI * Jobs get their schedule / triggers paused * Jobs get concurrent runs (it's really annoying if your runs get skipped because the last run was still in progress) Other accepted values for `mode` are `default` (which does nothing) and `pull-request` (which is reserved for future use). ## CLI changes To run a single job called "shark_sighting" on existing compute, use the following commands: ``` $ databricks bundle deploy --compute 0617-201942-9yd9g8ix $ databricks bundle run shark_sighting ``` which would deploy and run a job called "[dev] shark_sightings" on the compute provided. Note that `--compute` is not accepted in production environments, so we show an error if `mode: development` is not used. The `run --deploy` command offers a convenient shorthand for the common combination of deploying & running: ``` $ export DATABRICKS_COMPUTE=0617-201942-9yd9g8ix $ bundle run --deploy shark_sightings ``` The `--deploy` addition isn't really essential and I welcome feedback 🤔 I played with the idea of a "debug" or "dev" command but that seemed to only make the option space even broader for users. The above could work well with an IDE or workspace that automatically sets the target compute. One more thing I added is`run --no-wait` can now be used to run something without waiting for it to be completed (useful for IDE-like environments that can display progress themselves). ``` $ bundle run --deploy shark_sightings --no-wait ```
2023-07-12 06:51:54 +00:00
runOptions.NoWait = noWait
var output output.RunOutput
if restart {
output, err = runner.Restart(ctx, &runOptions)
} else {
output, err = runner.Run(ctx, &runOptions)
}
if err != nil {
return err
2022-12-15 14:12:47 +00:00
}
if output != nil {
switch root.OutputType(cmd) {
case flags.OutputText:
resultString, err := output.String()
if err != nil {
return err
}
_, err = cmd.OutOrStdout().Write([]byte(resultString))
if err != nil {
return err
}
case flags.OutputJSON:
b, err := json.MarshalIndent(output, "", " ")
if err != nil {
return err
}
_, err = cmd.OutOrStdout().Write(b)
if err != nil {
return err
}
default:
return fmt.Errorf("unknown output type %s", root.OutputType(cmd))
}
}
2022-12-15 14:12:47 +00:00
return nil
}
cmd.ValidArgsFunction = func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
b, diags := root.MustConfigureBundle(cmd)
if err := diags.Error(); err != nil {
cobra.CompErrorln(err.Error())
return nil, cobra.ShellCompDirectiveError
}
// No completion in the context of a bundle.
// Source and destination paths are taken from bundle configuration.
if b == nil {
return nil, cobra.ShellCompDirectiveNoFileComp
}
if len(args) == 0 {
completions := resources.Completions(b, run.IsRunnable)
return maps.Keys(completions), cobra.ShellCompDirectiveNoFileComp
} else {
// If we know the resource to run, we can complete additional positional arguments.
runner, err := keyToRunner(b, args[0])
if err != nil {
return nil, cobra.ShellCompDirectiveError
}
return runner.CompleteArgs(args[1:], toComplete)
}
}
2022-12-15 14:12:47 +00:00
return cmd
2022-12-15 14:12:47 +00:00
}