2023-04-14 12:40:34 +00:00
|
|
|
package progress
|
2023-03-31 15:04:12 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/databricks/databricks-sdk-go"
|
|
|
|
"github.com/databricks/databricks-sdk-go/service/pipelines"
|
|
|
|
)
|
|
|
|
|
2023-04-19 20:11:05 +00:00
|
|
|
// The dlt backend computes events for pipeline runs which are accessable through
|
|
|
|
// the 2.0/pipelines/{pipeline_id}/events API
|
|
|
|
//
|
|
|
|
// There are 4 levels for these events: ("ERROR", "WARN", "INFO", "METRICS")
|
|
|
|
//
|
|
|
|
// Here's short introduction to a few important events we display on the console:
|
|
|
|
//
|
|
|
|
// 1. `update_progress`: A state transition occured for the entire pipeline update
|
|
|
|
// 2. `flow_progress`: A state transition occured for a single flow in the pipeine
|
2023-03-31 15:04:12 +00:00
|
|
|
type ProgressEvent pipelines.PipelineEvent
|
|
|
|
|
|
|
|
func (event *ProgressEvent) String() string {
|
|
|
|
result := strings.Builder{}
|
|
|
|
result.WriteString(event.Timestamp + " ")
|
|
|
|
|
2023-04-14 10:21:44 +00:00
|
|
|
// Print event type with some padding to make output more pretty
|
|
|
|
result.WriteString(fmt.Sprintf("%-15s", event.EventType) + " ")
|
|
|
|
|
2023-03-31 15:04:12 +00:00
|
|
|
result.WriteString(event.Level.String() + " ")
|
2023-04-14 10:21:44 +00:00
|
|
|
result.WriteString(fmt.Sprintf(`"%s"`, event.Message))
|
2023-03-31 15:04:12 +00:00
|
|
|
|
2023-04-19 20:11:05 +00:00
|
|
|
// construct error string if level=`Error`
|
|
|
|
if event.Level == pipelines.EventLevelError && event.Error != nil {
|
2023-04-18 13:00:34 +00:00
|
|
|
for _, exception := range event.Error.Exceptions {
|
|
|
|
result.WriteString(fmt.Sprintf("\n%s", exception.Message))
|
|
|
|
}
|
|
|
|
}
|
2023-03-31 15:04:12 +00:00
|
|
|
return result.String()
|
|
|
|
}
|
|
|
|
|
2023-04-18 12:20:35 +00:00
|
|
|
func (event *ProgressEvent) IsInplaceSupported() bool {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-03-31 15:04:12 +00:00
|
|
|
// TODO: Add inplace logging to pipelines. https://github.com/databricks/bricks/issues/280
|
|
|
|
type UpdateTracker struct {
|
|
|
|
UpdateId string
|
|
|
|
PipelineId string
|
|
|
|
LatestEventTimestamp string
|
|
|
|
w *databricks.WorkspaceClient
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewUpdateTracker(pipelineId string, updateId string, w *databricks.WorkspaceClient) *UpdateTracker {
|
|
|
|
return &UpdateTracker{
|
|
|
|
w: w,
|
|
|
|
PipelineId: pipelineId,
|
|
|
|
UpdateId: updateId,
|
|
|
|
LatestEventTimestamp: "",
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// To keep the logic simple we do not use pagination. This means that if there are
|
|
|
|
// more than 100 new events since the last query then we will miss out on progress events.
|
|
|
|
//
|
|
|
|
// This is fine because:
|
|
|
|
// 1. This should happen fairly rarely if ever
|
|
|
|
// 2. There is no expectation of the console progress logs being a complete representation
|
|
|
|
//
|
|
|
|
// # If a user needs the complete logs, they can always visit the run URL
|
|
|
|
//
|
|
|
|
// NOTE: Incase we want inplace logging, then we will need to implement pagination
|
|
|
|
func (l *UpdateTracker) Events(ctx context.Context) ([]ProgressEvent, error) {
|
|
|
|
// create filter to fetch only new events
|
2023-04-14 10:21:44 +00:00
|
|
|
filter := fmt.Sprintf(`update_id = '%s'`, l.UpdateId)
|
2023-03-31 15:04:12 +00:00
|
|
|
if l.LatestEventTimestamp != "" {
|
2023-04-14 10:21:44 +00:00
|
|
|
filter = filter + fmt.Sprintf(" AND timestamp > '%s'", l.LatestEventTimestamp)
|
2023-03-31 15:04:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// we only check the most recent 100 events for progress
|
|
|
|
response, err := l.w.Pipelines.Impl().ListPipelineEvents(ctx, pipelines.ListPipelineEvents{
|
|
|
|
PipelineId: l.PipelineId,
|
|
|
|
MaxResults: 100,
|
|
|
|
Filter: filter,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
result := make([]ProgressEvent, 0)
|
2023-04-14 10:21:44 +00:00
|
|
|
// we iterate in reverse to return events in chronological order
|
|
|
|
for i := len(response.Events) - 1; i >= 0; i-- {
|
|
|
|
event := response.Events[i]
|
|
|
|
// filter to only include update_progress and flow_progress events
|
2023-03-31 15:04:12 +00:00
|
|
|
if event.EventType == "flow_progress" || event.EventType == "update_progress" {
|
|
|
|
result = append(result, ProgressEvent(event))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// update latest event timestamp for next time
|
|
|
|
if len(result) > 0 {
|
2023-04-14 10:21:44 +00:00
|
|
|
l.LatestEventTimestamp = result[len(result)-1].Timestamp
|
2023-03-31 15:04:12 +00:00
|
|
|
}
|
2023-04-14 10:21:44 +00:00
|
|
|
|
2023-03-31 15:04:12 +00:00
|
|
|
return result, nil
|
|
|
|
}
|