Add protos for bundle telemetry

This commit is contained in:
Shreyas Goenka 2025-01-22 14:40:12 +01:00
parent 667302b61b
commit 535d5563e8
No known key found for this signature in database
GPG Key ID: 92A07DF49CCB0622
7 changed files with 226 additions and 0 deletions

19
libs/telemetry/api.go Normal file
View File

@ -0,0 +1,19 @@
package telemetry
// RequestBody is the request body type bindings for the /telemetry-ext API endpoint.
type RequestBody struct {
UploadTime int64 `json:"uploadTime"`
Items []string `json:"items"`
ProtoLogs []string `json:"protoLogs"`
}
// ResponseBody is the response body type bindings for the /telemetry-ext API endpoint.
type ResponseBody struct {
Errors []LogError `json:"errors"`
NumProtoSuccess int64 `json:"numProtoSuccess"`
}
type LogError struct {
Message string `json:"message"`
ErrorType string `json:"ErrorType"`
}

View File

@ -0,0 +1,2 @@
The types in this package are equivalent to the lumberjack protos defined in Universe.
You can find all lumberjack protos for the Databricks CLI in the `proto/logs/frontend/databricks_cli` directory.

View File

@ -0,0 +1,77 @@
package protos
type BundleDeployEvent struct {
// UUID associated with the bundle itself. Set in the `bundle.uuid` field in the bundle configuration.
BundleUuid string `json:"bundle_uuid,omitempty"`
ResourceCount int64 `json:"resource_count,omitempty"`
ResourceJobCount int64 `json:"resource_job_count,omitempty"`
ResourcePipelineCount int64 `json:"resource_pipeline_count,omitempty"`
ResourceModelCount int64 `json:"resource_model_count,omitempty"`
ResourceExperimentCount int64 `json:"resource_experiment_count,omitempty"`
ResourceModelServingEndpointCount int64 `json:"resource_model_serving_endpoint_count,omitempty"`
ResourceRegisteredModelCount int64 `json:"resource_registered_model_count,omitempty"`
ResourceQualityMonitorCount int64 `json:"resource_quality_monitor_count,omitempty"`
ResourceSchemaCount int64 `json:"resource_schema_count,omitempty"`
ResourceVolumeCount int64 `json:"resource_volume_count,omitempty"`
ResourceClusterCount int64 `json:"resource_cluster_count,omitempty"`
ResourceDashboardCount int64 `json:"resource_dashboard_count,omitempty"`
ResourceAppCount int64 `json:"resource_app_count,omitempty"`
// IDs of resources managed by the bundle. Some resources like volumes or schemas
// do not expose a numerical or UUID identifier and are tracked by name. Those
// resources are not tracked here since the names are PII.
ResourceJobIds []string `json:"resource_job_ids,omitempty"`
ResourcePipelineIds []string `json:"resource_pipeline_ids,omitempty"`
ResourceClusterIds []string `json:"resource_cluster_ids,omitempty"`
ResourceDashboardIds []string `json:"resource_dashboard_ids,omitempty"`
Experimental *BundleDeployExperimental `json:"experimental,omitempty"`
}
// These metrics are experimental and are often added in an adhoc manner. There
// are no guarantees for these metrics and they maybe removed in the future without
// any notice.
type BundleDeployExperimental struct {
// Number of YAML (or JSON) configuration files in the bundle.
ConfigurationFileCount int64 `json:"configuration_file_count,omitempty"`
// Size in bytes of the Terraform state file
TerraformStateSizeBytes int64 `json:"terraform_state_size_bytes,omitempty"`
// Number of variables in the bundle
VariableCount int64 `json:"variable_count,omitempty"`
ComplexVariableCount int64 `json:"complex_variable_count,omitempty"`
LookupVariableCount int64 `json:"lookup_variable_count,omitempty"`
// Number of targets in the bundle
TargetCount int64 `json:"target_count,omitempty"`
// Whether a field is set or not. If a configuration field is not present in this
// map then it is not tracked by this field.
// Keys are the full path of the field in the configuration tree.
// Examples: "bundle.terraform.exec_path", "bundle.git.branch" etc.
SetFields []BoolMapEntry `json:"set_fields,omitempty"`
// Values for boolean configuration fields like `experimental.python_wheel_wrapper`
// We don't need to define protos to track boolean values and can simply write those
// values to this map to track them.
BoolValues []BoolMapEntry `json:"bool_values,omitempty"`
BundleMode BundleMode `json:"bundle_mode,omitempty"`
WorkspaceArtifactPathType BundleDeployArtifactPathType `json:"workspace_artifact_path_type,omitempty"`
// Execution time per mutator for a selected subset of mutators.
BundleMutatorExecutionTimeMs []IntMapEntry `json:"bundle_mutator_execution_time_ms,omitempty"`
}
type BoolMapEntry struct {
Key string `json:"key,omitempty"`
Value bool `json:"value,omitempty"`
}
type IntMapEntry struct {
Key string `json:"key,omitempty"`
Value int64 `json:"value,omitempty"`
}

View File

@ -0,0 +1,37 @@
package protos
type BundleInitEvent struct {
// UUID associated with the DAB itself. This is serialized into the DAB
// when a user runs `databricks bundle init` and all subsequent deployments of
// that DAB can then be associated with this init event.
BundleUuid string `json:"bundle_uuid,omitempty"`
// Name of the template initialized when the user ran `databricks bundle init`
// This is only populated when the template is a first party template like
// mlops-stacks or default-python.
TemplateName string `json:"template_name,omitempty"`
// Arguments used by the user to initialize the template. Only enum
// values will be set here by the Databricks CLI.
//
// We use a generic map representation here because a bundle template's args are
// managed in the template itself and maintaining a copy typed schema for it here
// will be untenable in the long term.
TemplateEnumArgs []BundleInitTemplateEnumArg `json:"template_enum_args,omitempty"`
}
type BundleInitTemplateEnumArg struct {
// Valid key values for the template. These correspond to the keys specified in
// the "properties" section of the `databricks_template_schema.json` file.
//
// Note: `databricks_template_schema.json` contains a JSON schema type specification
// for the arguments that the template accepts.
Key string `json:"key"`
// Value that the user set for the field. This is only populated for properties
// that have the "enum" field specified in the JSON schema type specification.
//
// The Databricks CLI ensures that the value here is one of the "enum" values from
// the template specification.
Value string `json:"value"`
}

View File

@ -0,0 +1,44 @@
package protos
type ExecutionContext struct {
// UUID generated by the CLI for every CLI command run. This is also set in the HTTP user
// agent under the key "cmd-exec-id" and can be used to correlate frontend_log table
// with the http_access_log table.
CmdExecId string `json:"cmd_exec_id,omitempty"`
// Version of the Databricks CLI used.
Version string `json:"version,omitempty"`
// Command that was run by the user. Eg: bundle_deploy, fs_cp etc.
Command string `json:"command,omitempty"`
// Lowercase string name for the operating system. Same value
// as the one set in `runtime.GOOS` in Golang.
OperatingSystem string `json:"operating_system,omitempty"`
// Version of DBR from which CLI is being run.
// Only set when the CLI is being run from a Databricks cluster.
DbrVersion string `json:"dbr_version,omitempty"`
// If true, the CLI is being run from a Databricks notebook / cluster web terminal.
FromWebTerminal bool `json:"from_web_terminal,omitempty"`
// Time taken for the CLI command to execute.
ExecutionTimeMs int64 `json:"execution_time_ms,omitempty"`
// Exit code of the CLI command.
ExitCode int64 `json:"exit_code,omitempty"`
}
// Top level proto message for all structured telemetry logs that are generated client side by the Databricks CLI
// type DatabricksCliLog struct {
// ExecutionContext ExecutionContext `json:"execution_context,omitempty"`
// CliLogEvent *CliTestEvent `json:"cli_test_event,omitempty"`
// BundleInitEvent *BundleInitEvent `json:"bundle_init_event,omitempty"`
// }
type CliTestEvent struct {
Name DummyCliEnum `json:"name,omitempty"`
}

View File

@ -0,0 +1,26 @@
package protos
type DummyCliEnum string
const (
DummyCliEnumUnspecified DummyCliEnum = "DUMMY_CLI_ENUM_UNSPECIFIED"
DummyCliEnumValue1 DummyCliEnum = "VALUE1"
DummyCliEnumValue2 DummyCliEnum = "VALUE2"
DummyCliEnumValue3 DummyCliEnum = "VALUE3"
)
type BundleMode string
const (
BundleModeUnspecified BundleMode = "TYPE_UNSPECIFIED"
BundleModeDevelopment BundleMode = "DEVELOPMENT"
BundleModeProduction BundleMode = "PRODUCTION"
)
type BundleDeployArtifactPathType string
const (
BundleDeployArtifactPathTypeUnspecified BundleDeployArtifactPathType = "TYPE_UNSPECIFIED"
BundleDeployArtifactPathTypeWorkspace BundleDeployArtifactPathType = "WORKSPACE_FILE_SYSTEM"
BundleDeployArtifactPathTypeVolume BundleDeployArtifactPathType = "UC_VOLUME"
)

View File

@ -0,0 +1,21 @@
package protos
// This corresponds to the FrontendLog lumberjack proto in universe.
// FrontendLog is the top-level struct for any client-side logs at Databricks.
type FrontendLog struct {
// A unique identifier for the log event generated from the CLI.
FrontendLogEventID string `json:"frontend_log_event_id,omitempty"`
Entry FrontendLogEntry `json:"entry,omitempty"`
}
type FrontendLogEntry struct {
DatabricksCliLog DatabricksCliLog `json:"databricks_cli_log,omitempty"`
}
type DatabricksCliLog struct {
ExecutionContext *ExecutionContext `json:"execution_context,omitempty"`
CliTestEvent *CliTestEvent `json:"cli_test_event,omitempty"`
BundleInitEvent *BundleInitEvent `json:"bundle_init_event,omitempty"`
}