databricks-cli/libs/telemetry/upload.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

134 lines
3.7 KiB
Go
Raw Normal View History

2025-02-03 03:18:38 +00:00
package telemetry
import (
"context"
"encoding/json"
2025-03-02 14:25:03 +00:00
"errors"
2025-02-03 03:18:38 +00:00
"fmt"
"io"
"net/http"
"os"
"time"
"github.com/databricks/cli/libs/telemetry/protos"
2025-03-02 14:25:03 +00:00
"github.com/databricks/databricks-sdk-go/apierr"
2025-02-03 03:18:38 +00:00
"github.com/databricks/databricks-sdk-go/client"
"github.com/databricks/databricks-sdk-go/config"
)
2025-02-04 15:25:59 +00:00
const (
2025-02-24 16:09:18 +00:00
// File containing output from the upload process.
2025-02-04 15:25:59 +00:00
UploadLogsFileEnvVar = "DATABRICKS_CLI_TELEMETRY_UPLOAD_LOGS_FILE"
// File containing the PID of the telemetry upload process.
PidFileEnvVar = "DATABRICKS_CLI_TELEMETRY_PID_FILE"
2025-02-18 15:47:47 +00:00
// Environment variable to disable telemetry. If this is set to any value, telemetry
// will be disabled.
2025-02-24 17:31:43 +00:00
DisableEnvVar = "DATABRICKS_CLI_DISABLE_TELEMETRY"
2025-02-24 14:26:31 +00:00
// Max time to try and upload the telemetry logs. Useful for testing.
2025-03-02 14:25:03 +00:00
// TODO: Remove the test case for this.
2025-02-24 15:10:50 +00:00
UploadTimeoutEnvVar = "DATABRICKS_CLI_TELEMETRY_UPLOAD_TIMEOUT"
2025-02-04 15:25:59 +00:00
)
2025-02-03 03:18:38 +00:00
type UploadConfig struct {
Logs []protos.FrontendLog `json:"logs"`
}
2025-03-02 14:25:03 +00:00
func readLogs(stdin io.Reader) ([]string, error) {
b, err := io.ReadAll(stdin)
2025-02-03 03:18:38 +00:00
if err != nil {
2025-02-04 15:25:59 +00:00
return nil, fmt.Errorf("failed to read from stdin: %s\n", err)
2025-02-03 03:18:38 +00:00
}
in := UploadConfig{}
err = json.Unmarshal(b, &in)
if err != nil {
2025-02-04 15:25:59 +00:00
return nil, fmt.Errorf("failed to unmarshal input: %s\n", err)
2025-02-03 03:18:38 +00:00
}
if len(in.Logs) == 0 {
2025-02-04 15:25:59 +00:00
return nil, fmt.Errorf("No logs to upload: %s\n", err)
2025-02-03 03:18:38 +00:00
}
protoLogs := make([]string, len(in.Logs))
for i, log := range in.Logs {
b, err := json.Marshal(log)
if err != nil {
2025-02-04 15:25:59 +00:00
return nil, fmt.Errorf("failed to marshal log: %s\n", err)
2025-02-03 03:18:38 +00:00
}
protoLogs[i] = string(b)
}
2025-03-02 14:25:03 +00:00
return protoLogs, nil
}
// Upload reads telemetry logs from stdin and uploads them to the telemetry endpoint.
// This function is always expected to be called in a separate child process from
// the main CLI process.
func Upload(ctx context.Context) (*ResponseBody, error) {
logs, err := readLogs(os.Stdin)
if err != nil {
return nil, err
}
2025-02-03 03:18:38 +00:00
// Parent process is responsible for setting environment variables to
// configure authentication.
apiClient, err := client.New(&config.Config{})
if err != nil {
2025-02-04 15:25:59 +00:00
return nil, fmt.Errorf("Failed to create API client: %s\n", err)
2025-02-03 03:18:38 +00:00
}
2025-03-02 14:25:03 +00:00
var resp *ResponseBody
2025-02-03 03:18:38 +00:00
2025-03-02 14:25:03 +00:00
// Only try uploading logs for a maximum of 3 times.
for range 3 {
2025-03-02 14:25:03 +00:00
// TODO: Confirm that the timeout of a request here is indeed one minute.
resp, err = attempt(ctx, apiClient, logs)
// All logs were uploaded successfully.
if err == nil && resp.NumProtoSuccess >= int64(len(logs)) {
return resp, nil
2025-02-03 03:18:38 +00:00
}
2025-03-02 14:25:03 +00:00
// Partial success. Retry.
if err == nil && resp.NumProtoSuccess < int64(len(logs)) {
time.Sleep(2 * time.Second)
continue
2025-02-03 03:18:38 +00:00
}
2025-03-02 14:25:03 +00:00
// We retry for all 5xx responses. We explicitly omit 503 in the predicate here
// because it is already automatically retried in the SDK layer.
// ref: https://github.com/databricks/databricks-sdk-go/blob/cdb28002afacb8b762348534a4c4040a9f19c24b/apierr/errors.go#L91
var apiErr *apierr.APIError
if errors.As(err, &apiErr) && apiErr.StatusCode >= 500 && apiErr.StatusCode != 503 {
time.Sleep(2 * time.Second)
continue
2025-02-03 03:18:38 +00:00
}
2025-03-02 14:25:03 +00:00
}
2025-02-28 10:39:01 +00:00
2025-03-02 14:25:03 +00:00
return resp, fmt.Errorf("upload did not succeed after three attempts. err: %#v. response body: %#v", err, resp)
}
func attempt(ctx context.Context, apiClient *client.DatabricksClient, protoLogs []string) (*ResponseBody, error) {
resp := &ResponseBody{}
err := apiClient.Do(ctx, http.MethodPost, "/telemetry-ext", nil, nil, RequestBody{
UploadTime: time.Now().UnixMilli(),
// There is a bug in the `/telemetry-ext` API which requires us to
// send an empty array for the `Items` field. Otherwise the API returns
// a 500.
Items: []string{},
ProtoLogs: protoLogs,
}, resp)
if err != nil {
return nil, err
}
if len(resp.Errors) > 0 {
return nil, fmt.Errorf("uploading telemetry failed: %v", resp.Errors)
2025-02-03 03:18:38 +00:00
}
2025-03-02 14:25:03 +00:00
return resp, nil
2025-02-03 03:18:38 +00:00
}