From e4f088a65cb2e4be6b1f0a8cfd4d5e730e32869c Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 27 Dec 2024 11:35:04 +0530 Subject: [PATCH] add event proper --- cmd/bundle/init.go | 50 ++++++++-- cmd/bundle/init_test.go | 10 +- cmd/root/root.go | 17 ++++ integration/bundle/helpers_test.go | 10 +- libs/telemetry/events/bundle_init.go | 12 +-- libs/telemetry/logger.go | 28 +++--- libs/template/config.go | 13 +++ libs/template/materialize.go | 142 +++++++++++++++++++-------- libs/template/materialize_test.go | 10 +- 9 files changed, 213 insertions(+), 79 deletions(-) diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 687c141ec..9b945df6b 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -101,16 +101,16 @@ func getNativeTemplateByDescription(description string) string { return "" } -func getUrlForNativeTemplate(name string) string { +func getNativeTemplateByName(name string) *nativeTemplate { for _, template := range nativeTemplates { if template.name == name { - return template.gitUrl + return &template } if slices.Contains(template.aliases, name) { - return template.gitUrl + return &template } } - return "" + return nil } func getFsForNativeTemplate(name string) (fs.FS, error) { @@ -235,10 +235,21 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf return nil } - // Expand templatePath to a git URL if it's an alias for a known native template - // and we know it's git URL. - if gitUrl := getUrlForNativeTemplate(templatePath); gitUrl != "" { - templatePath = gitUrl + // If templatePath refers to a native template, store it's name in a new + // variable. + nt := getNativeTemplateByName(templatePath) + templateName := "custom" + isTemplateDatabricksOwned := false + if nt != nil { + templateName = templatePath + + // if we have a Git URL for the native template, expand templatePath + // to the full URL. + if nt.gitUrl != "" { + templatePath = nt.gitUrl + } + + isTemplateDatabricksOwned = true } if !isRepoUrl(templatePath) { @@ -256,9 +267,19 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf templateFS = os.DirFS(templatePath) } + t := template.Template{ + TemplateOpts: template.TemplateOpts{ + ConfigFilePath: configFile, + TemplateFS: templateFS, + OutputFiler: outputFiler, + IsDatabricksOwned: isTemplateDatabricksOwned, + Name: templateName, + }, + } + // skip downloading the repo because input arg is not a URL. We assume // it's a path on the local file system in that case - return template.Materialize(ctx, configFile, templateFS, outputFiler) + return t.Materialize(ctx) } // Create a temporary directory with the name of the repository. The '*' @@ -283,7 +304,16 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf // Clean up downloaded repository once the template is materialized. defer os.RemoveAll(repoDir) templateFS := os.DirFS(filepath.Join(repoDir, templateDir)) - return template.Materialize(ctx, configFile, templateFS, outputFiler) + t := template.Template{ + TemplateOpts: template.TemplateOpts{ + ConfigFilePath: configFile, + TemplateFS: templateFS, + OutputFiler: outputFiler, + IsDatabricksOwned: isTemplateDatabricksOwned, + Name: templateName, + }, + } + return t.Materialize(ctx) } return cmd } diff --git a/cmd/bundle/init_test.go b/cmd/bundle/init_test.go index 475b2e149..6d2529273 100644 --- a/cmd/bundle/init_test.go +++ b/cmd/bundle/init_test.go @@ -46,9 +46,9 @@ func TestNativeTemplateHelpDescriptions(t *testing.T) { assert.Equal(t, expected, nativeTemplateHelpDescriptions()) } -func TestGetUrlForNativeTemplate(t *testing.T) { - assert.Equal(t, "https://github.com/databricks/mlops-stacks", getUrlForNativeTemplate("mlops-stacks")) - assert.Equal(t, "https://github.com/databricks/mlops-stacks", getUrlForNativeTemplate("mlops-stack")) - assert.Equal(t, "", getUrlForNativeTemplate("default-python")) - assert.Equal(t, "", getUrlForNativeTemplate("invalid")) +func TestGetNativeTemplateByName(t *testing.T) { + assert.Equal(t, "https://github.com/databricks/mlops-stacks", getNativeTemplateByName("mlops-stacks").gitUrl) + assert.Equal(t, "https://github.com/databricks/mlops-stacks", getNativeTemplateByName("mlops-stack").gitUrl) + assert.Equal(t, "", getNativeTemplateByName("default-python").gitUrl) + assert.Nil(t, nil, getNativeTemplateByName("invalid")) } diff --git a/cmd/root/root.go b/cmd/root/root.go index 3b37d0176..20079a0bb 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -12,6 +12,8 @@ import ( "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/telemetry" + "github.com/databricks/databricks-sdk-go/client" "github.com/spf13/cobra" ) @@ -52,6 +54,9 @@ func New(ctx context.Context) *cobra.Command { return err } + // Configure the logger to send telemetry to Databricks. + ctx = telemetry.NewContext(ctx) + logger := log.GetLogger(ctx) logger.Info("start", slog.String("version", build.GetInfo().Version), @@ -84,6 +89,18 @@ func New(ctx context.Context) *cobra.Command { return nil } + cmd.PersistentPostRun = func(cmd *cobra.Command, args []string) { + ctx := cmd.Context() + w := WorkspaceClient(ctx) + apiClient, err := client.New(w.Config) + if err != nil { + // Uploading telemetry is best effort. Do not error. + return + } + + telemetry.Flush(cmd.Context(), apiClient) + } + cmd.SetFlagErrorFunc(flagErrorFunc) cmd.SetVersionTemplate("Databricks CLI v{{.Version}}\n") return cmd diff --git a/integration/bundle/helpers_test.go b/integration/bundle/helpers_test.go index e884cd8c6..84c8364dd 100644 --- a/integration/bundle/helpers_test.go +++ b/integration/bundle/helpers_test.go @@ -42,7 +42,15 @@ func initTestTemplateWithBundleRoot(t testutil.TestingT, ctx context.Context, te out, err := filer.NewLocalClient(bundleRoot) require.NoError(t, err) - err = template.Materialize(ctx, configFilePath, os.DirFS(templateRoot), out) + tmpl := template.Template{ + TemplateOpts: template.TemplateOpts{ + ConfigFilePath: configFilePath, + TemplateFS: os.DirFS(templateRoot), + OutputFiler: out, + }, + } + + err = tmpl.Materialize(ctx) require.NoError(t, err) return bundleRoot } diff --git a/libs/telemetry/events/bundle_init.go b/libs/telemetry/events/bundle_init.go index cb303abe7..38be47f60 100644 --- a/libs/telemetry/events/bundle_init.go +++ b/libs/telemetry/events/bundle_init.go @@ -11,7 +11,7 @@ type BundleInitEvent struct { // Name of the template initialized when the user ran `databricks bundle init` // This is only populated when the template is a first party template like // mlops-stacks or default-python. - TemplateName BundleTemplate `json:"template_name,omitempty"` + TemplateName string `json:"template_name,omitempty"` // Arguments used by the user to initialize the template. Only enum // values will be set here by the Databricks CLI. @@ -21,13 +21,3 @@ type BundleInitEvent struct { // will be untenable in the long term. TemplateEnumArgs map[string]string `json:"template_enum_args,omitempty"` } - -type BundleTemplate string - -const ( - BundleTemplateMlopsStacks BundleTemplate = "mlops-stacks" - BundleTemplateDefaultPython BundleTemplate = "default-python" - BundleTemplateDefaultSql BundleTemplate = "default-sql" - BundleTemplateDbtSql BundleTemplate = "dbt-sql" - BundleTemplateCustom BundleTemplate = "custom" -) diff --git a/libs/telemetry/logger.go b/libs/telemetry/logger.go index b1dd799b4..844d56b84 100644 --- a/libs/telemetry/logger.go +++ b/libs/telemetry/logger.go @@ -7,9 +7,7 @@ import ( "net/http" "time" - "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/log" - "github.com/databricks/databricks-sdk-go/client" "github.com/google/uuid" ) @@ -58,18 +56,24 @@ func Flush(ctx context.Context, apiClient DatabricksApiClient) { defer cancel() l := fromContext(ctx) - // We pass the API client as an arg to mock it in unit tests. - if apiClient == nil { - var err error - - // Create API client to make the the telemetry API call. - apiClient, err = client.New(root.WorkspaceClient(ctx).Config) - if err != nil { - log.Debugf(ctx, "error creating API client for telemetry: %v", err) - return - } + if len(l.protoLogs) == 0 { + log.Debugf(ctx, "No telemetry events to flush") + return } + // We pass the API client as an arg to mock it in unit tests. + // TODO: Cleanup and remove this section. + // if apiClient == nil { + // var err error + + // // Create API client to make the the telemetry API call. + // apiClient, err = client.New(root.WorkspaceClient(ctx).Config) + // if err != nil { + // log.Debugf(ctx, "error creating API client for telemetry: %v", err) + // return + // } + // } + resp := &ResponseBody{} for { select { diff --git a/libs/template/config.go b/libs/template/config.go index 8e7695b91..251d3e9ae 100644 --- a/libs/template/config.go +++ b/libs/template/config.go @@ -273,3 +273,16 @@ func (c *config) validate() error { } return nil } + +// Return enum values selected by the user during template initialization. These +// values are safe to send over in telemetry events due to their limited cardinality. +func (c *config) enumValues() map[string]string { + res := map[string]string{} + for k, p := range c.schema.Properties { + if p.Enum == nil { + continue + } + res[k] = c.values[k].(string) + } + return res +} diff --git a/libs/template/materialize.go b/libs/template/materialize.go index 86a6a8c37..1590199d8 100644 --- a/libs/template/materialize.go +++ b/libs/template/materialize.go @@ -8,6 +8,8 @@ import ( "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/telemetry" + "github.com/databricks/cli/libs/telemetry/events" ) const ( @@ -16,42 +18,65 @@ const ( schemaFileName = "databricks_template_schema.json" ) -// This function materializes the input templates as a project, using user defined -// configurations. -// Parameters: -// -// ctx: context containing a cmdio object. This is used to prompt the user -// configFilePath: file path containing user defined config values -// templateFS: root of the template definition -// outputFiler: filer to use for writing the initialized template -func Materialize(ctx context.Context, configFilePath string, templateFS fs.FS, outputFiler filer.Filer) error { - if _, err := fs.Stat(templateFS, schemaFileName); errors.Is(err, fs.ErrNotExist) { +type TemplateOpts struct { + // file path containing user defined config values + ConfigFilePath string + // root of the template definition + TemplateFS fs.FS + // filer to use for writing the initialized template + OutputFiler filer.Filer + // If true, we'll include the enum template args in the telemetry payload.. + IsDatabricksOwned bool + // Name of the template. For non-Databricks owned templates, this is set to + // custom. + // TODO: move this enum to the template package. + Name string +} + +type Template struct { + TemplateOpts + + // internal object used to prompt user for config values and store them. + config *config + + // internal object user to render the template. + renderer *renderer +} + +// This function resolves input to use to materialize the template in two steps. +// 1. First, this function loads any user specified input configuration if the user +// has provided a config file path. +// 2. For any values that are required by the template but not provided in the config +// file, this function prompts the user for them. +func (t *Template) resolveTemplateInput(ctx context.Context) error { + if _, err := fs.Stat(t.TemplateFS, schemaFileName); errors.Is(err, fs.ErrNotExist) { return fmt.Errorf("not a bundle template: expected to find a template schema file at %s", schemaFileName) } - config, err := newConfig(ctx, templateFS, schemaFileName) + var err error + t.config, err = newConfig(ctx, t.TemplateFS, schemaFileName) if err != nil { return err } // Read and assign config values from file - if configFilePath != "" { - err = config.assignValuesFromFile(configFilePath) + if t.ConfigFilePath != "" { + err = t.config.assignValuesFromFile(t.ConfigFilePath) if err != nil { return err } } helpers := loadHelpers(ctx) - r, err := newRenderer(ctx, config.values, helpers, templateFS, templateDirName, libraryDirName) + t.renderer, err = newRenderer(ctx, t.config.values, helpers, t.TemplateFS, templateDirName, libraryDirName) if err != nil { return err } // Print welcome message - welcome := config.schema.WelcomeMessage + welcome := t.config.schema.WelcomeMessage if welcome != "" { - welcome, err = r.executeTemplate(welcome) + welcome, err = t.renderer.executeTemplate(welcome) if err != nil { return err } @@ -60,35 +85,74 @@ func Materialize(ctx context.Context, configFilePath string, templateFS fs.FS, o // Prompt user for any missing config values. Assign default values if // terminal is not TTY - err = config.promptOrAssignDefaultValues(r) - if err != nil { - return err - } - err = config.validate() + err = t.config.promptOrAssignDefaultValues(t.renderer) if err != nil { return err } + return t.config.validate() +} - // Walk and render the template, since input configuration is complete - err = r.walk() - if err != nil { - return err - } - - err = r.persistToDisk(ctx, outputFiler) - if err != nil { - return err - } - - success := config.schema.SuccessMessage +func (t *Template) printSuccessMessage(ctx context.Context) error { + success := t.config.schema.SuccessMessage if success == "" { cmdio.LogString(ctx, "✨ Successfully initialized template") - } else { - success, err = r.executeTemplate(success) - if err != nil { - return err - } - cmdio.LogString(ctx, success) + return nil } + + success, err := t.renderer.executeTemplate(success) + if err != nil { + return err + } + cmdio.LogString(ctx, success) return nil } + +func (t *Template) logTelemetry(ctx context.Context) error { + // Only log telemetry input for Databricks owned templates. This is to prevent + // accidentally collecting PUII from custom user templates. + templateEnumArgs := map[string]string{} + if !t.IsDatabricksOwned { + templateEnumArgs = t.config.enumValues() + } + + event := telemetry.DatabricksCliLog{ + BundleInitEvent: &events.BundleInitEvent{ + Uuid: bundleUuid, + TemplateName: t.Name, + TemplateEnumArgs: templateEnumArgs, + }, + } + + return telemetry.Log(ctx, telemetry.FrontendLogEntry{ + DatabricksCliLog: event, + }) +} + +// This function materializes the input templates as a project, using user defined +// configurations. +func (t *Template) Materialize(ctx context.Context) error { + err := t.resolveTemplateInput(ctx) + if err != nil { + return err + } + + // Walk the template file tree and compute in-memory representations of the + // output files. + err = t.renderer.walk() + if err != nil { + return err + } + + // Flush the output files to disk. + err = t.renderer.persistToDisk(ctx, t.OutputFiler) + if err != nil { + return err + } + + err = t.printSuccessMessage(ctx) + if err != nil { + return err + } + + return t.logTelemetry(ctx) +} diff --git a/libs/template/materialize_test.go b/libs/template/materialize_test.go index f7cd916e3..c0ec79484 100644 --- a/libs/template/materialize_test.go +++ b/libs/template/materialize_test.go @@ -18,7 +18,15 @@ func TestMaterializeForNonTemplateDirectory(t *testing.T) { require.NoError(t, err) ctx := root.SetWorkspaceClient(context.Background(), w) + tmpl := Template{ + TemplateOpts: TemplateOpts{ + ConfigFilePath: "", + TemplateFS: os.DirFS(tmpDir), + OutputFiler: nil, + }, + } + // Try to materialize a non-template directory. - err = Materialize(ctx, "", os.DirFS(tmpDir), nil) + err = tmpl.Materialize(ctx) assert.EqualError(t, err, fmt.Sprintf("not a bundle template: expected to find a template schema file at %s", schemaFileName)) }