databricks-cli/bundle/run/pipeline.go

218 lines
6.3 KiB
Go
Raw Normal View History

2022-12-15 14:12:47 +00:00
package run
import (
"context"
"fmt"
"strings"
2022-12-15 14:12:47 +00:00
"time"
"github.com/databricks/bricks/bundle"
"github.com/databricks/bricks/bundle/config/resources"
"github.com/databricks/bricks/bundle/run/output"
"github.com/databricks/bricks/bundle/run/progress"
"github.com/databricks/bricks/libs/cmdio"
"github.com/databricks/bricks/libs/log"
2022-12-15 14:12:47 +00:00
"github.com/databricks/databricks-sdk-go/service/pipelines"
flag "github.com/spf13/pflag"
2022-12-15 14:12:47 +00:00
)
Log latest error event on pipeline run fail (#239) DAB config used to test this: bundle.yml ``` workspace: host: <deco-azure-prod> bundle: name: deco-538 resources: pipelines: foo: name: "[${bundle.name}] log pipeline errors" libraries: - notebook: path: ./myNb.py development: true ``` myNb.py ``` # Databricks notebook source print(1/0) ``` Before: ``` 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:28:46 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update has failed! Error: update failed ``` Now: ``` 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:29:33 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update has failed! 2023/03/09 01:29:40 [ERROR] [pipelines.foo] Update 27bc77 is FAILED. trace for most recent exception: Failed to execute python command for notebook '/Users/shreyas.goenka@databricks.com/.bundle/deco-538/default/files/myNb' with id RunnableCommandId(9070319781942164851) and error AnsiResult(--------------------------------------------------------------------------- ZeroDivisionError Traceback (most recent call last) <command--1> in <cell line: 1>() ----> 1 print(1/0) ZeroDivisionError: division by zero,Map(),Map(),List(),List(),Map()) Error: update failed ```
2023-03-16 11:23:46 +00:00
func filterEventsByUpdateId(events []pipelines.PipelineEvent, updateId string) []pipelines.PipelineEvent {
result := []pipelines.PipelineEvent{}
for i := 0; i < len(events); i++ {
if events[i].Origin.UpdateId == updateId {
result = append(result, events[i])
}
}
return result
}
func (r *pipelineRunner) logEvent(ctx context.Context, event pipelines.PipelineEvent) {
logString := ""
Log latest error event on pipeline run fail (#239) DAB config used to test this: bundle.yml ``` workspace: host: <deco-azure-prod> bundle: name: deco-538 resources: pipelines: foo: name: "[${bundle.name}] log pipeline errors" libraries: - notebook: path: ./myNb.py development: true ``` myNb.py ``` # Databricks notebook source print(1/0) ``` Before: ``` 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:28:46 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update has failed! Error: update failed ``` Now: ``` 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:29:33 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update has failed! 2023/03/09 01:29:40 [ERROR] [pipelines.foo] Update 27bc77 is FAILED. trace for most recent exception: Failed to execute python command for notebook '/Users/shreyas.goenka@databricks.com/.bundle/deco-538/default/files/myNb' with id RunnableCommandId(9070319781942164851) and error AnsiResult(--------------------------------------------------------------------------- ZeroDivisionError Traceback (most recent call last) <command--1> in <cell line: 1>() ----> 1 print(1/0) ZeroDivisionError: division by zero,Map(),Map(),List(),List(),Map()) Error: update failed ```
2023-03-16 11:23:46 +00:00
if event.Message != "" {
logString += fmt.Sprintf(" %s\n", event.Message)
}
if event.Error != nil && len(event.Error.Exceptions) > 0 {
logString += "trace for most recent exception: \n"
for i := 0; i < len(event.Error.Exceptions); i++ {
logString += fmt.Sprintf("%s\n", event.Error.Exceptions[i].Message)
}
}
if logString != "" {
log.Errorf(ctx, fmt.Sprintf("[%s] %s", event.EventType, logString))
Log latest error event on pipeline run fail (#239) DAB config used to test this: bundle.yml ``` workspace: host: <deco-azure-prod> bundle: name: deco-538 resources: pipelines: foo: name: "[${bundle.name}] log pipeline errors" libraries: - notebook: path: ./myNb.py development: true ``` myNb.py ``` # Databricks notebook source print(1/0) ``` Before: ``` 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:28:46 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update has failed! Error: update failed ``` Now: ``` 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:29:33 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update has failed! 2023/03/09 01:29:40 [ERROR] [pipelines.foo] Update 27bc77 is FAILED. trace for most recent exception: Failed to execute python command for notebook '/Users/shreyas.goenka@databricks.com/.bundle/deco-538/default/files/myNb' with id RunnableCommandId(9070319781942164851) and error AnsiResult(--------------------------------------------------------------------------- ZeroDivisionError Traceback (most recent call last) <command--1> in <cell line: 1>() ----> 1 print(1/0) ZeroDivisionError: division by zero,Map(),Map(),List(),List(),Map()) Error: update failed ```
2023-03-16 11:23:46 +00:00
}
}
func (r *pipelineRunner) logErrorEvent(ctx context.Context, pipelineId string, updateId string) error {
w := r.bundle.WorkspaceClient()
// Note: For a 100 percent correct and complete solution we should use the
// w.Pipelines.ListPipelineEventsAll method to find all relevant events. However the
// probablity of the relevant last error event not being present in the most
// recent 100 error events is very close to 0 and the first 100 error events
// should give us a good picture of the error.
//
// Otherwise for long lived pipelines, there can be a lot of unnecessary
// latency due to multiple pagination API calls needed underneath the hood for
// ListPipelineEventsAll
res, err := w.Pipelines.Impl().ListPipelineEvents(ctx, pipelines.ListPipelineEvents{
Filter: `level='ERROR'`,
MaxResults: 100,
PipelineId: pipelineId,
})
if err != nil {
return err
}
updateEvents := filterEventsByUpdateId(res.Events, updateId)
// The events API returns most recent events first. We iterate in a reverse order
// to print the events chronologically
for i := len(updateEvents) - 1; i >= 0; i-- {
r.logEvent(ctx, updateEvents[i])
Log latest error event on pipeline run fail (#239) DAB config used to test this: bundle.yml ``` workspace: host: <deco-azure-prod> bundle: name: deco-538 resources: pipelines: foo: name: "[${bundle.name}] log pipeline errors" libraries: - notebook: path: ./myNb.py development: true ``` myNb.py ``` # Databricks notebook source print(1/0) ``` Before: ``` 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:28:46 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update has failed! Error: update failed ``` Now: ``` 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:29:33 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update has failed! 2023/03/09 01:29:40 [ERROR] [pipelines.foo] Update 27bc77 is FAILED. trace for most recent exception: Failed to execute python command for notebook '/Users/shreyas.goenka@databricks.com/.bundle/deco-538/default/files/myNb' with id RunnableCommandId(9070319781942164851) and error AnsiResult(--------------------------------------------------------------------------- ZeroDivisionError Traceback (most recent call last) <command--1> in <cell line: 1>() ----> 1 print(1/0) ZeroDivisionError: division by zero,Map(),Map(),List(),List(),Map()) Error: update failed ```
2023-03-16 11:23:46 +00:00
}
return nil
}
// PipelineOptions defines options for running a pipeline update.
type PipelineOptions struct {
// Perform a full graph update.
RefreshAll bool
// List of tables to update.
Refresh []string
// Perform a full graph reset and recompute.
FullRefreshAll bool
// List of tables to reset and recompute.
FullRefresh []string
}
func (o *PipelineOptions) Define(fs *flag.FlagSet) {
fs.BoolVar(&o.RefreshAll, "refresh-all", false, "Perform a full graph update.")
fs.StringSliceVar(&o.Refresh, "refresh", nil, "List of tables to update.")
fs.BoolVar(&o.FullRefreshAll, "full-refresh-all", false, "Perform a full graph reset and recompute.")
fs.StringSliceVar(&o.FullRefresh, "full-refresh", nil, "List of tables to reset and recompute.")
}
// Validate returns if the combination of options is valid.
func (o *PipelineOptions) Validate() error {
set := []string{}
if o.RefreshAll {
set = append(set, "--refresh-all")
}
if len(o.Refresh) > 0 {
set = append(set, "--refresh")
}
if o.FullRefreshAll {
set = append(set, "--full-refresh-all")
}
if len(o.FullRefresh) > 0 {
set = append(set, "--full-refresh")
}
if len(set) > 1 {
return fmt.Errorf("pipeline run arguments are mutually exclusive (got %s)", strings.Join(set, ", "))
}
return nil
}
func (o *PipelineOptions) toPayload(pipelineID string) (*pipelines.StartUpdate, error) {
if err := o.Validate(); err != nil {
return nil, err
}
payload := &pipelines.StartUpdate{
PipelineId: pipelineID,
// Note: `RefreshAll` is implied if the fields below are not set.
RefreshSelection: o.Refresh,
FullRefresh: o.FullRefreshAll,
FullRefreshSelection: o.FullRefresh,
}
return payload, nil
}
2022-12-15 14:12:47 +00:00
type pipelineRunner struct {
key
bundle *bundle.Bundle
pipeline *resources.Pipeline
}
func (r *pipelineRunner) Run(ctx context.Context, opts *Options) (output.RunOutput, error) {
2022-12-15 14:12:47 +00:00
var pipelineID = r.pipeline.ID
// Include resource key in logger.
ctx = log.NewContext(ctx, log.GetLogger(ctx).With("resource", r.Key()))
2022-12-15 14:12:47 +00:00
w := r.bundle.WorkspaceClient()
2022-12-22 08:46:17 +00:00
_, err := w.Pipelines.GetByPipelineId(ctx, pipelineID)
2022-12-15 14:12:47 +00:00
if err != nil {
log.Warnf(ctx, "Cannot get pipeline: %s", err)
return nil, err
2022-12-15 14:12:47 +00:00
}
req, err := opts.Pipeline.toPayload(pipelineID)
if err != nil {
return nil, err
}
res, err := w.Pipelines.StartUpdate(ctx, *req)
2022-12-15 14:12:47 +00:00
if err != nil {
return nil, err
2022-12-15 14:12:47 +00:00
}
updateID := res.UpdateId
// setup progress logger and tracker to query events
updateTracker := progress.NewUpdateTracker(pipelineID, updateID, w)
progressLogger, ok := cmdio.FromContext(ctx)
if !ok {
return nil, fmt.Errorf("no progress logger found")
}
2022-12-15 14:12:47 +00:00
// Log the pipeline update URL as soon as it is available.
progressLogger.Log(progress.NewPipelineUpdateUrlEvent(w.Config.Host, updateID, pipelineID))
2022-12-15 14:12:47 +00:00
// Poll update for completion and post status.
// Note: there is no "StartUpdateAndWait" wrapper for this API.
var prevState *pipelines.UpdateInfoState
for {
events, err := updateTracker.Events(ctx)
if err != nil {
return nil, err
}
for _, event := range events {
progressLogger.Log(&event)
log.Infof(ctx, event.String())
}
2022-12-15 14:12:47 +00:00
update, err := w.Pipelines.GetUpdateByPipelineIdAndUpdateId(ctx, pipelineID, updateID)
if err != nil {
return nil, err
2022-12-15 14:12:47 +00:00
}
// Log only if the current state is different from the previous state.
state := update.Update.State
if prevState == nil || *prevState != state {
log.Infof(ctx, "Update status: %s", state)
2022-12-15 14:12:47 +00:00
prevState = &state
}
if state == pipelines.UpdateInfoStateCanceled {
log.Infof(ctx, "Update was cancelled!")
return nil, fmt.Errorf("update cancelled")
2022-12-15 14:12:47 +00:00
}
if state == pipelines.UpdateInfoStateFailed {
log.Infof(ctx, "Update has failed!")
Log latest error event on pipeline run fail (#239) DAB config used to test this: bundle.yml ``` workspace: host: <deco-azure-prod> bundle: name: deco-538 resources: pipelines: foo: name: "[${bundle.name}] log pipeline errors" libraries: - notebook: path: ./myNb.py development: true ``` myNb.py ``` # Databricks notebook source print(1/0) ``` Before: ``` 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:28:46 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update has failed! Error: update failed ``` Now: ``` 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:29:33 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update has failed! 2023/03/09 01:29:40 [ERROR] [pipelines.foo] Update 27bc77 is FAILED. trace for most recent exception: Failed to execute python command for notebook '/Users/shreyas.goenka@databricks.com/.bundle/deco-538/default/files/myNb' with id RunnableCommandId(9070319781942164851) and error AnsiResult(--------------------------------------------------------------------------- ZeroDivisionError Traceback (most recent call last) <command--1> in <cell line: 1>() ----> 1 print(1/0) ZeroDivisionError: division by zero,Map(),Map(),List(),List(),Map()) Error: update failed ```
2023-03-16 11:23:46 +00:00
err := r.logErrorEvent(ctx, pipelineID, updateID)
if err != nil {
return nil, err
Log latest error event on pipeline run fail (#239) DAB config used to test this: bundle.yml ``` workspace: host: <deco-azure-prod> bundle: name: deco-538 resources: pipelines: foo: name: "[${bundle.name}] log pipeline errors" libraries: - notebook: path: ./myNb.py development: true ``` myNb.py ``` # Databricks notebook source print(1/0) ``` Before: ``` 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:28:44 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:28:46 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:28:52 [INFO] [pipelines.foo] Update has failed! Error: update failed ``` Now: ``` 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update available at *** 2023/03/09 01:29:31 [INFO] [pipelines.foo] Update status: CREATED 2023/03/09 01:29:33 [INFO] [pipelines.foo] Update status: INITIALIZING 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update status: FAILED 2023/03/09 01:29:40 [INFO] [pipelines.foo] Update has failed! 2023/03/09 01:29:40 [ERROR] [pipelines.foo] Update 27bc77 is FAILED. trace for most recent exception: Failed to execute python command for notebook '/Users/shreyas.goenka@databricks.com/.bundle/deco-538/default/files/myNb' with id RunnableCommandId(9070319781942164851) and error AnsiResult(--------------------------------------------------------------------------- ZeroDivisionError Traceback (most recent call last) <command--1> in <cell line: 1>() ----> 1 print(1/0) ZeroDivisionError: division by zero,Map(),Map(),List(),List(),Map()) Error: update failed ```
2023-03-16 11:23:46 +00:00
}
return nil, fmt.Errorf("update failed")
2022-12-15 14:12:47 +00:00
}
if state == pipelines.UpdateInfoStateCompleted {
log.Infof(ctx, "Update has completed successfully!")
return nil, nil
2022-12-15 14:12:47 +00:00
}
time.Sleep(time.Second)
}
}