2022-12-15 14:12:47 +00:00
|
|
|
package run
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"strconv"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/databricks/bricks/bundle"
|
|
|
|
"github.com/databricks/bricks/bundle/config/resources"
|
2023-04-14 12:40:34 +00:00
|
|
|
"github.com/databricks/bricks/bundle/run/output"
|
|
|
|
"github.com/databricks/bricks/bundle/run/progress"
|
2023-04-06 10:54:58 +00:00
|
|
|
"github.com/databricks/bricks/libs/cmdio"
|
2023-03-17 14:17:31 +00:00
|
|
|
"github.com/databricks/bricks/libs/log"
|
2022-12-15 14:12:47 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/retries"
|
|
|
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
2023-02-20 22:40:14 +00:00
|
|
|
"github.com/fatih/color"
|
2022-12-23 14:17:16 +00:00
|
|
|
flag "github.com/spf13/pflag"
|
2022-12-15 14:12:47 +00:00
|
|
|
)
|
|
|
|
|
2022-12-23 14:17:16 +00:00
|
|
|
// JobOptions defines options for running a job.
|
|
|
|
type JobOptions struct {
|
|
|
|
dbtCommands []string
|
|
|
|
jarParams []string
|
|
|
|
notebookParams map[string]string
|
|
|
|
pipelineParams map[string]string
|
|
|
|
pythonNamedParams map[string]string
|
|
|
|
pythonParams []string
|
|
|
|
sparkSubmitParams []string
|
|
|
|
sqlParams map[string]string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (o *JobOptions) Define(fs *flag.FlagSet) {
|
|
|
|
fs.StringSliceVar(&o.dbtCommands, "dbt-commands", nil, "A list of commands to execute for jobs with DBT tasks.")
|
|
|
|
fs.StringSliceVar(&o.jarParams, "jar-params", nil, "A list of parameters for jobs with Spark JAR tasks.")
|
|
|
|
fs.StringToStringVar(&o.notebookParams, "notebook-params", nil, "A map from keys to values for jobs with notebook tasks.")
|
|
|
|
fs.StringToStringVar(&o.pipelineParams, "pipeline-params", nil, "A map from keys to values for jobs with pipeline tasks.")
|
|
|
|
fs.StringToStringVar(&o.pythonNamedParams, "python-named-params", nil, "A map from keys to values for jobs with Python wheel tasks.")
|
|
|
|
fs.StringSliceVar(&o.pythonParams, "python-params", nil, "A list of parameters for jobs with Python tasks.")
|
|
|
|
fs.StringSliceVar(&o.sparkSubmitParams, "spark-submit-params", nil, "A list of parameters for jobs with Spark submit tasks.")
|
|
|
|
fs.StringToStringVar(&o.sqlParams, "sql-params", nil, "A map from keys to values for jobs with SQL tasks.")
|
|
|
|
}
|
|
|
|
|
|
|
|
func (o *JobOptions) validatePipelineParams() (*jobs.PipelineParams, error) {
|
|
|
|
if len(o.pipelineParams) == 0 {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var defaultErr = fmt.Errorf("job run argument --pipeline-params only supports `full_refresh=<bool>`")
|
|
|
|
v, ok := o.pipelineParams["full_refresh"]
|
|
|
|
if !ok {
|
|
|
|
return nil, defaultErr
|
|
|
|
}
|
|
|
|
|
|
|
|
b, err := strconv.ParseBool(v)
|
|
|
|
if err != nil {
|
|
|
|
return nil, defaultErr
|
|
|
|
}
|
|
|
|
|
|
|
|
pipelineParams := &jobs.PipelineParams{
|
|
|
|
FullRefresh: b,
|
|
|
|
}
|
|
|
|
|
|
|
|
return pipelineParams, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (o *JobOptions) toPayload(jobID int64) (*jobs.RunNow, error) {
|
|
|
|
pipelineParams, err := o.validatePipelineParams()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
payload := &jobs.RunNow{
|
|
|
|
JobId: jobID,
|
|
|
|
|
|
|
|
DbtCommands: o.dbtCommands,
|
|
|
|
JarParams: o.jarParams,
|
|
|
|
NotebookParams: o.notebookParams,
|
|
|
|
PipelineParams: pipelineParams,
|
|
|
|
PythonNamedParams: o.pythonNamedParams,
|
|
|
|
PythonParams: o.pythonParams,
|
|
|
|
SparkSubmitParams: o.sparkSubmitParams,
|
|
|
|
SqlParams: o.sqlParams,
|
|
|
|
}
|
|
|
|
|
|
|
|
return payload, nil
|
|
|
|
}
|
|
|
|
|
2022-12-15 14:12:47 +00:00
|
|
|
// Default timeout for waiting for a job run to complete.
|
|
|
|
var jobRunTimeout time.Duration = 2 * time.Hour
|
|
|
|
|
|
|
|
type jobRunner struct {
|
|
|
|
key
|
|
|
|
|
|
|
|
bundle *bundle.Bundle
|
|
|
|
job *resources.Job
|
|
|
|
}
|
|
|
|
|
2023-02-20 22:40:14 +00:00
|
|
|
func isFailed(task jobs.RunTask) bool {
|
|
|
|
return task.State.LifeCycleState == jobs.RunLifeCycleStateInternalError ||
|
|
|
|
(task.State.LifeCycleState == jobs.RunLifeCycleStateTerminated &&
|
|
|
|
task.State.ResultState == jobs.RunResultStateFailed)
|
|
|
|
}
|
|
|
|
|
|
|
|
func isSuccess(task jobs.RunTask) bool {
|
|
|
|
return task.State.LifeCycleState == jobs.RunLifeCycleStateTerminated &&
|
|
|
|
task.State.ResultState == jobs.RunResultStateSuccess
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *jobRunner) logFailedTasks(ctx context.Context, runId int64) {
|
|
|
|
w := r.bundle.WorkspaceClient()
|
|
|
|
red := color.New(color.FgRed).SprintFunc()
|
|
|
|
green := color.New(color.FgGreen).SprintFunc()
|
|
|
|
yellow := color.New(color.FgYellow).SprintFunc()
|
|
|
|
run, err := w.Jobs.GetRun(ctx, jobs.GetRun{
|
|
|
|
RunId: runId,
|
|
|
|
})
|
|
|
|
if err != nil {
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Errorf(ctx, "failed to log job run. Error: %s", err)
|
2023-02-20 22:40:14 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
if run.State.ResultState == jobs.RunResultStateSuccess {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
for _, task := range run.Tasks {
|
|
|
|
if isSuccess(task) {
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Infof(ctx, "task %s completed successfully", green(task.TaskKey))
|
2023-02-20 22:40:14 +00:00
|
|
|
} else if isFailed(task) {
|
|
|
|
taskInfo, err := w.Jobs.GetRunOutput(ctx, jobs.GetRunOutput{
|
|
|
|
RunId: task.RunId,
|
|
|
|
})
|
|
|
|
if err != nil {
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Errorf(ctx, "task %s failed. Unable to fetch error trace: %s", red(task.TaskKey), err)
|
2023-02-20 22:40:14 +00:00
|
|
|
continue
|
|
|
|
}
|
2023-04-18 12:58:20 +00:00
|
|
|
if progressLogger, ok := cmdio.FromContext(ctx); ok {
|
|
|
|
progressLogger.Log(progress.NewTaskErrorEvent(task.TaskKey, taskInfo.Error, taskInfo.ErrorTrace))
|
|
|
|
}
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Errorf(ctx, "Task %s failed!\nError:\n%s\nTrace:\n%s",
|
2023-02-20 22:40:14 +00:00
|
|
|
red(task.TaskKey), taskInfo.Error, taskInfo.ErrorTrace)
|
|
|
|
} else {
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Infof(ctx, "task %s is in state %s",
|
2023-02-20 22:40:14 +00:00
|
|
|
yellow(task.TaskKey), task.State.LifeCycleState)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-29 12:58:09 +00:00
|
|
|
func pullRunIdCallback(runId *int64) func(info *retries.Info[jobs.Run]) {
|
|
|
|
return func(info *retries.Info[jobs.Run]) {
|
|
|
|
i := info.Info
|
|
|
|
if i == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if *runId == 0 {
|
|
|
|
*runId = i.RunId
|
|
|
|
}
|
2022-12-15 14:12:47 +00:00
|
|
|
}
|
2023-03-29 12:58:09 +00:00
|
|
|
}
|
2022-12-15 14:12:47 +00:00
|
|
|
|
2023-03-29 12:58:09 +00:00
|
|
|
func logDebugCallback(ctx context.Context, runId *int64) func(info *retries.Info[jobs.Run]) {
|
2022-12-15 14:12:47 +00:00
|
|
|
var prevState *jobs.RunState
|
2023-03-29 12:58:09 +00:00
|
|
|
return func(info *retries.Info[jobs.Run]) {
|
2023-01-12 17:58:36 +00:00
|
|
|
i := info.Info
|
|
|
|
if i == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
state := i.State
|
2022-12-15 14:12:47 +00:00
|
|
|
if state == nil {
|
|
|
|
return
|
|
|
|
}
|
2023-01-12 17:58:36 +00:00
|
|
|
|
2022-12-15 14:12:47 +00:00
|
|
|
// Log the job run URL as soon as it is available.
|
|
|
|
if prevState == nil {
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Infof(ctx, "Run available at %s", info.Info.RunPageUrl)
|
2022-12-15 14:12:47 +00:00
|
|
|
}
|
|
|
|
if prevState == nil || prevState.LifeCycleState != state.LifeCycleState {
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Infof(ctx, "Run status: %s", info.Info.State.LifeCycleState)
|
2022-12-15 14:12:47 +00:00
|
|
|
prevState = state
|
|
|
|
}
|
2023-03-29 12:58:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-06 10:54:58 +00:00
|
|
|
func logProgressCallback(ctx context.Context, progressLogger *cmdio.Logger) func(info *retries.Info[jobs.Run]) {
|
2023-03-29 12:58:09 +00:00
|
|
|
var prevState *jobs.RunState
|
|
|
|
return func(info *retries.Info[jobs.Run]) {
|
|
|
|
i := info.Info
|
|
|
|
if i == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
state := i.State
|
|
|
|
if state == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-04-18 12:40:45 +00:00
|
|
|
if prevState == nil {
|
|
|
|
progressLogger.Log(progress.NewJobRunUrlEvent(i.RunPageUrl))
|
|
|
|
}
|
|
|
|
|
2023-03-29 12:58:09 +00:00
|
|
|
if prevState != nil && prevState.LifeCycleState == state.LifeCycleState &&
|
|
|
|
prevState.ResultState == state.ResultState {
|
|
|
|
return
|
|
|
|
} else {
|
|
|
|
prevState = state
|
|
|
|
}
|
|
|
|
|
2023-04-14 12:40:34 +00:00
|
|
|
event := &progress.JobProgressEvent{
|
2023-04-18 12:40:45 +00:00
|
|
|
Timestamp: time.Now(),
|
|
|
|
JobId: i.JobId,
|
|
|
|
RunId: i.RunId,
|
|
|
|
RunName: i.RunName,
|
|
|
|
State: *i.State,
|
2023-02-20 22:40:14 +00:00
|
|
|
}
|
2023-03-29 12:58:09 +00:00
|
|
|
|
|
|
|
// log progress events to stderr
|
|
|
|
progressLogger.Log(event)
|
|
|
|
|
|
|
|
// log progress events in using the default logger
|
|
|
|
log.Infof(ctx, event.String())
|
2022-12-15 14:12:47 +00:00
|
|
|
}
|
2023-03-29 12:58:09 +00:00
|
|
|
}
|
2022-12-15 14:12:47 +00:00
|
|
|
|
2023-04-14 12:40:34 +00:00
|
|
|
func (r *jobRunner) Run(ctx context.Context, opts *Options) (output.RunOutput, error) {
|
2023-03-29 12:58:09 +00:00
|
|
|
jobID, err := strconv.ParseInt(r.job.ID, 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("job ID is not an integer: %s", r.job.ID)
|
|
|
|
}
|
|
|
|
|
|
|
|
runId := new(int64)
|
|
|
|
|
|
|
|
// construct request payload from cmd line flags args
|
2022-12-23 14:17:16 +00:00
|
|
|
req, err := opts.Job.toPayload(jobID)
|
|
|
|
if err != nil {
|
2023-03-21 15:25:18 +00:00
|
|
|
return nil, err
|
2022-12-23 14:17:16 +00:00
|
|
|
}
|
|
|
|
|
2023-03-17 14:17:31 +00:00
|
|
|
// Include resource key in logger.
|
|
|
|
ctx = log.NewContext(ctx, log.GetLogger(ctx).With("resource", r.Key()))
|
2023-03-29 12:58:09 +00:00
|
|
|
|
2022-12-15 14:12:47 +00:00
|
|
|
w := r.bundle.WorkspaceClient()
|
2023-03-29 12:58:09 +00:00
|
|
|
|
|
|
|
// gets the run id from inside Jobs.RunNowAndWait
|
|
|
|
pullRunId := pullRunIdCallback(runId)
|
|
|
|
|
|
|
|
// callback to log status updates to the universal log destination.
|
|
|
|
// Called on every poll request
|
|
|
|
logDebug := logDebugCallback(ctx, runId)
|
|
|
|
|
|
|
|
// callback to log progress events. Called on every poll request
|
2023-04-06 10:54:58 +00:00
|
|
|
progressLogger, ok := cmdio.FromContext(ctx)
|
2023-03-29 12:58:09 +00:00
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("no progress logger found")
|
|
|
|
}
|
|
|
|
logProgress := logProgressCallback(ctx, progressLogger)
|
|
|
|
|
|
|
|
run, err := w.Jobs.RunNowAndWait(ctx, *req,
|
|
|
|
retries.Timeout[jobs.Run](jobRunTimeout), pullRunId, logDebug, logProgress)
|
2023-02-20 22:40:14 +00:00
|
|
|
if err != nil && runId != nil {
|
|
|
|
r.logFailedTasks(ctx, *runId)
|
|
|
|
}
|
2022-12-15 14:12:47 +00:00
|
|
|
if err != nil {
|
2023-03-21 15:25:18 +00:00
|
|
|
return nil, err
|
2022-12-15 14:12:47 +00:00
|
|
|
}
|
2023-03-21 12:17:15 +00:00
|
|
|
if run.State.LifeCycleState == jobs.RunLifeCycleStateSkipped {
|
|
|
|
log.Infof(ctx, "Run was skipped!")
|
2023-03-21 15:25:18 +00:00
|
|
|
return nil, fmt.Errorf("run skipped: %s", run.State.StateMessage)
|
2023-03-21 12:17:15 +00:00
|
|
|
}
|
2022-12-15 14:12:47 +00:00
|
|
|
|
|
|
|
switch run.State.ResultState {
|
|
|
|
// The run was canceled at user request.
|
|
|
|
case jobs.RunResultStateCanceled:
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Infof(ctx, "Run was cancelled!")
|
2023-03-21 15:25:18 +00:00
|
|
|
return nil, fmt.Errorf("run canceled: %s", run.State.StateMessage)
|
2022-12-15 14:12:47 +00:00
|
|
|
|
|
|
|
// The task completed with an error.
|
|
|
|
case jobs.RunResultStateFailed:
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Infof(ctx, "Run has failed!")
|
2023-03-21 15:25:18 +00:00
|
|
|
return nil, fmt.Errorf("run failed: %s", run.State.StateMessage)
|
2022-12-15 14:12:47 +00:00
|
|
|
|
|
|
|
// The task completed successfully.
|
|
|
|
case jobs.RunResultStateSuccess:
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Infof(ctx, "Run has completed successfully!")
|
2023-04-14 12:40:34 +00:00
|
|
|
return output.GetJobOutput(ctx, r.bundle.WorkspaceClient(), *runId)
|
2022-12-15 14:12:47 +00:00
|
|
|
|
|
|
|
// The run was stopped after reaching the timeout.
|
|
|
|
case jobs.RunResultStateTimedout:
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Infof(ctx, "Run has timed out!")
|
2023-03-21 15:25:18 +00:00
|
|
|
return nil, fmt.Errorf("run timed out: %s", run.State.StateMessage)
|
2022-12-15 14:12:47 +00:00
|
|
|
}
|
|
|
|
|
2023-03-21 15:25:18 +00:00
|
|
|
return nil, err
|
2022-12-15 14:12:47 +00:00
|
|
|
}
|