From aa4e231731625d51b894e32193d46622b71d3d27 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 6 Mar 2025 14:31:13 +0100 Subject: [PATCH] update acceptance tests --- acceptance/bundle/debug/out.stderr.txt | 1 - acceptance/telemetry/failure/output.txt | 11 ++++------- acceptance/telemetry/partial-success/output.txt | 11 ++++------- acceptance/telemetry/timeout/output.txt | 3 ++- cmd/root/root.go | 3 ++- libs/telemetry/logger.go | 11 +++++++++-- 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/acceptance/bundle/debug/out.stderr.txt b/acceptance/bundle/debug/out.stderr.txt index 631f51990..b9ac5c4d9 100644 --- a/acceptance/bundle/debug/out.stderr.txt +++ b/acceptance/bundle/debug/out.stderr.txt @@ -99,4 +99,3 @@ < } pid=12345 mutator=validate:files_to_sync sdk=true 10:07:59 Debug: Path /Workspace/Users/[USERNAME]/.bundle/debug/default/files has type directory (ID: 0) pid=12345 mutator=validate:files_to_sync 10:07:59 Info: completed execution pid=12345 exit_code=0 -10:07:59 Debug: no telemetry logs to upload pid=12345 diff --git a/acceptance/telemetry/failure/output.txt b/acceptance/telemetry/failure/output.txt index fc7e022b7..8bdd695d6 100644 --- a/acceptance/telemetry/failure/output.txt +++ b/acceptance/telemetry/failure/output.txt @@ -17,8 +17,7 @@ HH:MM:SS Debug: POST /telemetry-ext < "message": "Endpoint not implemented." < } pid=PID sdk=true HH:MM:SS Debug: non-retriable error: Endpoint not implemented. pid=PID sdk=true -HH:MM:SS Debug: Attempt 1 failed due to a server side error. Retrying status code: 501 - pid=PID +HH:MM:SS Debug: Attempt 1 failed due to a server side error. Retrying status code: 501 pid=PID HH:MM:SS Debug: POST /telemetry-ext > { > "items": null, @@ -34,8 +33,7 @@ HH:MM:SS Debug: POST /telemetry-ext < "message": "Endpoint not implemented." < } pid=PID sdk=true HH:MM:SS Debug: non-retriable error: Endpoint not implemented. pid=PID sdk=true -HH:MM:SS Debug: Attempt 2 failed due to a server side error. Retrying status code: 501 - pid=PID +HH:MM:SS Debug: Attempt 2 failed due to a server side error. Retrying status code: 501 pid=PID HH:MM:SS Debug: POST /telemetry-ext > { > "items": null, @@ -51,6 +49,5 @@ HH:MM:SS Debug: POST /telemetry-ext < "message": "Endpoint not implemented." < } pid=PID sdk=true HH:MM:SS Debug: non-retriable error: Endpoint not implemented. pid=PID sdk=true -HH:MM:SS Debug: Attempt 3 failed due to a server side error. Retrying status code: 501 - pid=PID -HH:MM:SS Debug: failed to upload telemetry: failed to upload telemetry logs after three attempts pid=PID +HH:MM:SS Debug: Attempt 3 failed due to a server side error. Retrying status code: 501 pid=PID +HH:MM:SS Debug: telemetry upload failed: failed to upload telemetry logs after three attempts pid=PID diff --git a/acceptance/telemetry/partial-success/output.txt b/acceptance/telemetry/partial-success/output.txt index a03d95a93..f1de5358c 100644 --- a/acceptance/telemetry/partial-success/output.txt +++ b/acceptance/telemetry/partial-success/output.txt @@ -16,8 +16,7 @@ HH:MM:SS Debug: POST /telemetry-ext < "errors": null, < "numProtoSuccess": 1 < } pid=PID sdk=true -HH:MM:SS Debug: Attempt 1 was a partial success. Number of logs uploaded: 1 out of 2 - pid=PID +HH:MM:SS Debug: Attempt 1 was a partial success. Number of logs uploaded: 1 out of 2 pid=PID HH:MM:SS Debug: POST /telemetry-ext > { > "items": null, @@ -32,8 +31,7 @@ HH:MM:SS Debug: POST /telemetry-ext < "errors": null, < "numProtoSuccess": 1 < } pid=PID sdk=true -HH:MM:SS Debug: Attempt 2 was a partial success. Number of logs uploaded: 1 out of 2 - pid=PID +HH:MM:SS Debug: Attempt 2 was a partial success. Number of logs uploaded: 1 out of 2 pid=PID HH:MM:SS Debug: POST /telemetry-ext > { > "items": null, @@ -48,6 +46,5 @@ HH:MM:SS Debug: POST /telemetry-ext < "errors": null, < "numProtoSuccess": 1 < } pid=PID sdk=true -HH:MM:SS Debug: Attempt 3 was a partial success. Number of logs uploaded: 1 out of 2 - pid=PID -HH:MM:SS Debug: failed to upload telemetry: failed to upload telemetry logs after three attempts pid=PID +HH:MM:SS Debug: Attempt 3 was a partial success. Number of logs uploaded: 1 out of 2 pid=PID +HH:MM:SS Debug: telemetry upload failed: failed to upload telemetry logs after three attempts pid=PID diff --git a/acceptance/telemetry/timeout/output.txt b/acceptance/telemetry/timeout/output.txt index 485773c93..61fa32f40 100644 --- a/acceptance/telemetry/timeout/output.txt +++ b/acceptance/telemetry/timeout/output.txt @@ -13,4 +13,5 @@ HH:MM:SS Debug: POST /telemetry-ext > } < Error: Post "[DATABRICKS_URL]/telemetry-ext": context deadline exceeded pid=PID sdk=true HH:MM:SS Debug: non-retriable error: Post "[DATABRICKS_URL]/telemetry-ext": context deadline exceeded pid=PID sdk=true -HH:MM:SS Debug: failed to upload telemetry: uploading telemetry logs timed out: context deadline exceeded pid=PID +HH:MM:SS Debug: Attempt 1 failed due to a timeout. Will not retry pid=PID +HH:MM:SS Debug: telemetry upload failed: uploading telemetry logs timed out: Post "[DATABRICKS_URL]/telemetry-ext": context deadline exceeded pid=PID diff --git a/cmd/root/root.go b/cmd/root/root.go index 8e4b1b270..22d18092b 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -166,6 +166,7 @@ Stack Trace: exitCode = 1 } + ctx = cmd.Context() if telemetry.HasLogs(ctx) { err := telemetry.Upload(ctx, ConfigUsed(ctx), protos.ExecutionContext{ CmdExecID: cmdExecId, @@ -177,7 +178,7 @@ Stack Trace: ExitCode: int64(exitCode), }) if err != nil { - log.Debugf(ctx, "failed to upload telemetry logs: %s", err) + log.Debugf(ctx, "telemetry upload failed: %s", err) } } diff --git a/libs/telemetry/logger.go b/libs/telemetry/logger.go index df1c969df..50d2515b7 100644 --- a/libs/telemetry/logger.go +++ b/libs/telemetry/logger.go @@ -104,11 +104,18 @@ func Upload(ctx context.Context, cfg *config.Config, ec protos.ExecutionContext) // Partial success. Retry. if err == nil && resp.NumProtoSuccess < int64(len(protoLogs)) { - log.Debugf(ctx, "Attempt %d was a partial success. Number of logs uploaded: %d out of %d\n", i+1, resp.NumProtoSuccess, len(protoLogs)) + log.Debugf(ctx, "Attempt %d was a partial success. Number of logs uploaded: %d out of %d", i+1, resp.NumProtoSuccess, len(protoLogs)) time.Sleep(waitBetweenRetries) continue } + // Do not retry if the context deadline was exceeded. This means that our + // timeout of three seconds was triggered and we should not try again. + if errors.Is(err, context.DeadlineExceeded) { + log.Debugf(ctx, "Attempt %d failed due to a timeout. Will not retry", i+1) + return fmt.Errorf("uploading telemetry logs timed out: %w", err) + } + // We retry for all 5xx responses. Note that the SDK only retries for 503 and 429 // (as of 6th March 2025) so we need some additional logic here to retry for other // 5xx responses. @@ -118,7 +125,7 @@ func Upload(ctx context.Context, cfg *config.Config, ec protos.ExecutionContext) // all 5xx responses. var apiErr *apierr.APIError if errors.As(err, &apiErr) && apiErr.StatusCode >= 500 { - log.Debugf(ctx, "Attempt %d failed due to a server side error. Retrying status code: %d\n", i+1, apiErr.StatusCode) + log.Debugf(ctx, "Attempt %d failed due to a server side error. Retrying status code: %d", i+1, apiErr.StatusCode) time.Sleep(waitBetweenRetries) continue }