2025-02-03 03:18:38 +00:00
|
|
|
package telemetry
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"encoding/json"
|
2025-03-02 14:25:03 +00:00
|
|
|
"errors"
|
2025-02-03 03:18:38 +00:00
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"net/http"
|
|
|
|
"os"
|
|
|
|
"time"
|
|
|
|
|
2025-03-05 12:43:46 +00:00
|
|
|
"github.com/databricks/cli/libs/log"
|
2025-02-03 03:18:38 +00:00
|
|
|
"github.com/databricks/cli/libs/telemetry/protos"
|
2025-03-02 14:25:03 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/apierr"
|
2025-02-03 03:18:38 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/client"
|
|
|
|
"github.com/databricks/databricks-sdk-go/config"
|
|
|
|
)
|
|
|
|
|
2025-02-04 15:25:59 +00:00
|
|
|
const (
|
2025-02-24 16:09:18 +00:00
|
|
|
// File containing output from the upload process.
|
2025-03-02 14:27:45 +00:00
|
|
|
UploadLogsFileEnvVar = "DATABRICKS_CLI_TELEMETRY_LOGFILE"
|
2025-02-04 15:25:59 +00:00
|
|
|
|
|
|
|
// File containing the PID of the telemetry upload process.
|
2025-03-02 14:27:45 +00:00
|
|
|
PidFileEnvVar = "DATABRICKS_CLI_TELEMETRY_PIDFILE"
|
2025-02-18 15:47:47 +00:00
|
|
|
|
|
|
|
// Environment variable to disable telemetry. If this is set to any value, telemetry
|
|
|
|
// will be disabled.
|
2025-02-24 17:31:43 +00:00
|
|
|
DisableEnvVar = "DATABRICKS_CLI_DISABLE_TELEMETRY"
|
2025-02-04 15:25:59 +00:00
|
|
|
)
|
2025-02-03 03:18:38 +00:00
|
|
|
|
|
|
|
type UploadConfig struct {
|
|
|
|
Logs []protos.FrontendLog `json:"logs"`
|
|
|
|
}
|
|
|
|
|
2025-03-02 16:31:11 +00:00
|
|
|
// The API requires the logs to be JSON encoded strings. This function reads the
|
|
|
|
// logs from stdin and returns them as a slice of JSON encoded strings.
|
2025-03-02 14:25:03 +00:00
|
|
|
func readLogs(stdin io.Reader) ([]string, error) {
|
|
|
|
b, err := io.ReadAll(stdin)
|
2025-02-03 03:18:38 +00:00
|
|
|
if err != nil {
|
2025-03-02 16:31:11 +00:00
|
|
|
return nil, fmt.Errorf("failed to read from stdin: %s", err)
|
2025-02-03 03:18:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
in := UploadConfig{}
|
|
|
|
err = json.Unmarshal(b, &in)
|
|
|
|
if err != nil {
|
2025-03-02 16:31:11 +00:00
|
|
|
return nil, fmt.Errorf("failed to unmarshal input: %s", err)
|
2025-02-03 03:18:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if len(in.Logs) == 0 {
|
2025-03-02 16:31:31 +00:00
|
|
|
return nil, errors.New("No logs to upload")
|
2025-02-03 03:18:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
protoLogs := make([]string, len(in.Logs))
|
|
|
|
for i, log := range in.Logs {
|
|
|
|
b, err := json.Marshal(log)
|
|
|
|
if err != nil {
|
2025-03-02 16:31:11 +00:00
|
|
|
return nil, fmt.Errorf("failed to marshal log: %s", err)
|
2025-02-03 03:18:38 +00:00
|
|
|
}
|
|
|
|
protoLogs[i] = string(b)
|
|
|
|
}
|
|
|
|
|
2025-03-02 14:25:03 +00:00
|
|
|
return protoLogs, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Upload reads telemetry logs from stdin and uploads them to the telemetry endpoint.
|
|
|
|
// This function is always expected to be called in a separate child process from
|
|
|
|
// the main CLI process.
|
|
|
|
func Upload(ctx context.Context) (*ResponseBody, error) {
|
|
|
|
logs, err := readLogs(os.Stdin)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2025-02-03 03:18:38 +00:00
|
|
|
// Parent process is responsible for setting environment variables to
|
|
|
|
// configure authentication.
|
|
|
|
apiClient, err := client.New(&config.Config{})
|
|
|
|
if err != nil {
|
2025-02-04 15:25:59 +00:00
|
|
|
return nil, fmt.Errorf("Failed to create API client: %s\n", err)
|
2025-02-03 03:18:38 +00:00
|
|
|
}
|
|
|
|
|
2025-03-02 14:25:03 +00:00
|
|
|
var resp *ResponseBody
|
2025-02-03 03:18:38 +00:00
|
|
|
|
2025-03-02 14:25:03 +00:00
|
|
|
// Only try uploading logs for a maximum of 3 times.
|
2025-03-02 16:18:03 +00:00
|
|
|
for i := range 3 {
|
2025-03-02 14:25:03 +00:00
|
|
|
resp, err = attempt(ctx, apiClient, logs)
|
|
|
|
|
|
|
|
// All logs were uploaded successfully.
|
|
|
|
if err == nil && resp.NumProtoSuccess >= int64(len(logs)) {
|
|
|
|
return resp, nil
|
2025-02-03 03:18:38 +00:00
|
|
|
}
|
|
|
|
|
2025-03-02 14:25:03 +00:00
|
|
|
// Partial success. Retry.
|
|
|
|
if err == nil && resp.NumProtoSuccess < int64(len(logs)) {
|
2025-03-05 12:58:14 +00:00
|
|
|
log.Warnf(ctx, "Attempt %d was a partial success. Number of logs uploaded: %d out of %d\n", i+1, resp.NumProtoSuccess, len(logs))
|
2025-03-02 14:25:03 +00:00
|
|
|
time.Sleep(2 * time.Second)
|
|
|
|
continue
|
2025-02-03 03:18:38 +00:00
|
|
|
}
|
|
|
|
|
2025-03-02 14:25:03 +00:00
|
|
|
// We retry for all 5xx responses. We explicitly omit 503 in the predicate here
|
|
|
|
// because it is already automatically retried in the SDK layer.
|
|
|
|
// ref: https://github.com/databricks/databricks-sdk-go/blob/cdb28002afacb8b762348534a4c4040a9f19c24b/apierr/errors.go#L91
|
|
|
|
var apiErr *apierr.APIError
|
|
|
|
if errors.As(err, &apiErr) && apiErr.StatusCode >= 500 && apiErr.StatusCode != 503 {
|
2025-03-05 12:58:14 +00:00
|
|
|
log.Warnf(ctx, "Attempt %d failed due to a server side error. Retrying status code: %d\n", i+1, apiErr.StatusCode)
|
2025-03-02 14:25:03 +00:00
|
|
|
time.Sleep(2 * time.Second)
|
|
|
|
continue
|
2025-02-03 03:18:38 +00:00
|
|
|
}
|
2025-03-02 14:25:03 +00:00
|
|
|
}
|
2025-02-28 10:39:01 +00:00
|
|
|
|
2025-03-02 16:18:03 +00:00
|
|
|
return resp, errors.New("upload did not succeed after three attempts")
|
2025-03-02 14:25:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func attempt(ctx context.Context, apiClient *client.DatabricksClient, protoLogs []string) (*ResponseBody, error) {
|
|
|
|
resp := &ResponseBody{}
|
|
|
|
err := apiClient.Do(ctx, http.MethodPost, "/telemetry-ext", nil, nil, RequestBody{
|
|
|
|
UploadTime: time.Now().UnixMilli(),
|
|
|
|
// There is a bug in the `/telemetry-ext` API which requires us to
|
|
|
|
// send an empty array for the `Items` field. Otherwise the API returns
|
|
|
|
// a 500.
|
|
|
|
Items: []string{},
|
|
|
|
ProtoLogs: protoLogs,
|
|
|
|
}, resp)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(resp.Errors) > 0 {
|
|
|
|
return nil, fmt.Errorf("uploading telemetry failed: %v", resp.Errors)
|
2025-02-03 03:18:38 +00:00
|
|
|
}
|
2025-03-01 21:57:03 +00:00
|
|
|
|
2025-03-02 14:25:03 +00:00
|
|
|
return resp, nil
|
2025-02-03 03:18:38 +00:00
|
|
|
}
|