Show detailed error logs for jobs (#209)

PR for how to render errors on console for jobs. 
Here is the bundle used for the logs below:
```
bundle:
  name: deco-438

workspace:
  host: https://adb-309687753508875.15.azuredatabricks.net

resources:
  jobs:
    foo:
      name: "[${bundle.name}][${bundle.environment}] a test notebook"

      tasks:
        - task_key: alpha
          existing_cluster_id: 1109-115254-ox7poobk
          notebook_task:
            notebook_path: "/Users/shreyas.goenka@databricks.com/[deco-438] invalid notebook"
        - task_key: beta
          existing_cluster_id: 1109-115254-ox7poobk
          notebook_task:
            notebook_path: "/does-not-exist"
        - task_key: gamma
          existing_cluster_id: 1109-115254-ox7poobk
          notebook_task:
            notebook_path: "/Users/shreyas.goenka@databricks.com/[deco-438] valid notebook"
```

And this is a screenshot of the logs from the console:
<img width="1057" alt="Screenshot 2023-02-17 at 7 12 29 PM"
src="https://user-images.githubusercontent.com/88374338/219744768-ab7f1e79-db8f-466a-ad6d-f2b6f85ed17c.png">

Here are the logs when only tasks gamma is executed (successfully):
<img width="1059" alt="Screenshot 2023-02-17 at 7 13 04 PM"
src="https://user-images.githubusercontent.com/88374338/219744992-011d8b91-ec1d-44f0-a849-83c81816dd9f.png">


TODO: Investigate more possible job errors, and make sure state for them
is handled in a robust way here
This commit is contained in:
shreyas-goenka 2023-02-20 23:40:14 +01:00 committed by GitHub
parent ae9d6883ee
commit f93b541b63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 71 additions and 0 deletions

View File

@ -11,6 +11,7 @@ import (
"github.com/databricks/bricks/bundle/config/resources"
"github.com/databricks/databricks-sdk-go/retries"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/fatih/color"
flag "github.com/spf13/pflag"
)
@ -92,6 +93,56 @@ type jobRunner struct {
job *resources.Job
}
func isFailed(task jobs.RunTask) bool {
return task.State.LifeCycleState == jobs.RunLifeCycleStateInternalError ||
(task.State.LifeCycleState == jobs.RunLifeCycleStateTerminated &&
task.State.ResultState == jobs.RunResultStateFailed)
}
func isSuccess(task jobs.RunTask) bool {
return task.State.LifeCycleState == jobs.RunLifeCycleStateTerminated &&
task.State.ResultState == jobs.RunResultStateSuccess
}
func (r *jobRunner) logFailedTasks(ctx context.Context, runId int64) {
w := r.bundle.WorkspaceClient()
red := color.New(color.FgRed).SprintFunc()
green := color.New(color.FgGreen).SprintFunc()
yellow := color.New(color.FgYellow).SprintFunc()
errorPrefix := fmt.Sprintf("%s [%s]", red("[ERROR]"), r.Key())
infoPrefix := fmt.Sprintf("%s [%s]", "[INFO]", r.Key())
run, err := w.Jobs.GetRun(ctx, jobs.GetRun{
RunId: runId,
})
if err != nil {
log.Printf("%s failed to log job run. Error: %s", errorPrefix, err)
return
}
if run.State.ResultState == jobs.RunResultStateSuccess {
return
}
for _, task := range run.Tasks {
if isSuccess(task) {
log.Printf("%s task %s completed successfully", infoPrefix, green(task.TaskKey))
} else if isFailed(task) {
taskInfo, err := w.Jobs.GetRunOutput(ctx, jobs.GetRunOutput{
RunId: task.RunId,
})
if err != nil {
log.Printf("%s task %s failed. Unable to fetch error trace: %s",
errorPrefix, red(task.TaskKey), err)
continue
}
log.Printf("%s Task %s failed!\nError:\n%s\nTrace:\n%s", errorPrefix,
red(task.TaskKey), taskInfo.Error, taskInfo.ErrorTrace)
} else {
log.Printf("%s task %s is in state %s", infoPrefix,
yellow(task.TaskKey), task.State.LifeCycleState)
}
}
}
func (r *jobRunner) Run(ctx context.Context, opts *Options) error {
jobID, err := strconv.ParseInt(r.job.ID, 10, 64)
if err != nil {
@ -100,6 +151,7 @@ func (r *jobRunner) Run(ctx context.Context, opts *Options) error {
var prefix = fmt.Sprintf("[INFO] [%s]", r.Key())
var prevState *jobs.RunState
var runId *int64
// This function is called each time the function below polls the run status.
update := func(info *retries.Info[jobs.Run]) {
@ -121,6 +173,9 @@ func (r *jobRunner) Run(ctx context.Context, opts *Options) error {
log.Printf("%s Run status: %s", prefix, info.Info.State.LifeCycleState)
prevState = state
}
if runId == nil {
runId = &i.RunId
}
}
req, err := opts.Job.toPayload(jobID)
@ -129,7 +184,12 @@ func (r *jobRunner) Run(ctx context.Context, opts *Options) error {
}
w := r.bundle.WorkspaceClient()
run, err := w.Jobs.RunNowAndWait(ctx, *req, retries.Timeout[jobs.Run](jobRunTimeout), update)
if err != nil && runId != nil {
r.logFailedTasks(ctx, *runId)
}
if err != nil {
return err
}

3
go.mod
View File

@ -18,6 +18,7 @@ require (
)
require (
github.com/fatih/color v1.14.1
github.com/google/uuid v1.3.0
github.com/hashicorp/go-version v1.6.0
github.com/hashicorp/hc-install v0.5.0
@ -29,6 +30,8 @@ require (
require (
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect
github.com/zclconf/go-cty v1.11.0 // indirect
golang.org/x/crypto v0.5.0 // indirect
)

8
go.sum
View File

@ -45,6 +45,8 @@ github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.m
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.14.1 h1:qfhVLaG5s+nCROl1zJsZRxFeYrHLqWroPOQ8BWiNb4w=
github.com/fatih/color v1.14.1/go.mod h1:2oHN61fhTpgcxD3TSWCgKDiH1+x4OiDVVGH8WlgGZGg=
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
@ -138,7 +140,12 @@ github.com/manifoldco/promptui v0.9.0 h1:3V4HzJk1TtXW1MTZMP7mdlwbBpIinw3HztaIlYt
github.com/manifoldco/promptui v0.9.0/go.mod h1:ka04sppxSGFAtxX0qhlYQjISsg9mR4GWtQEhdbn6Pgg=
github.com/matryer/is v1.2.0/go.mod h1:2fLPjFQM9rhQ15aVEtbuwhJinnOqrmgXPNdZsdwlWXA=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mitchellh/cli v1.1.5/go.mod h1:v8+iFts2sPIKUV1ltktPXMCC8fumSKFItNcD2cLtRR4=
github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
@ -259,6 +266,7 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=