diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 1911abe19..ce36762ed 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -6,7 +6,10 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/telemetry" "github.com/databricks/cli/libs/template" + "github.com/databricks/databricks-sdk-go/client" "github.com/spf13/cobra" ) @@ -36,12 +39,28 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf cmd.Flags().StringVar(&branch, "tag", "", "Git tag to use for template initialization") cmd.Flags().StringVar(&tag, "branch", "", "Git branch to use for template initialization") - cmd.PreRunE = root.MustWorkspaceClient - cmd.RunE = func(cmd *cobra.Command, args []string) error { - if tag != "" && branch != "" { - return errors.New("only one of --tag or --branch can be specified") + cmd.PreRunE = func(cmd *cobra.Command, args []string) error { + // Configure the logger to send telemetry to Databricks. + ctx := telemetry.WithDefaultLogger(cmd.Context()) + cmd.SetContext(ctx) + + return root.MustWorkspaceClient(cmd, args) + } + + cmd.PostRun = func(cmd *cobra.Command, args []string) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + apiClient, err := client.New(w.Config) + if err != nil { + // Uploading telemetry is best effort. Do not error. + log.Debugf(ctx, "Could not create API client to send telemetry using: %v", err) + return } + telemetry.Flush(cmd.Context(), apiClient) + } + + cmd.RunE = func(cmd *cobra.Command, args []string) error { var templatePathOrUrl string if len(args) > 0 { templatePathOrUrl = args[0] @@ -67,7 +86,14 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf } defer tmpl.Reader.Cleanup(ctx) - return tmpl.Writer.Materialize(ctx, tmpl.Reader) + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) + if err != nil { + return err + } + defer tmpl.Reader.Cleanup(ctx) + + tmpl.Writer.LogTelemetry(ctx) + return nil } return cmd } diff --git a/integration/bundle/helpers_test.go b/integration/bundle/helpers_test.go index b4f9c9086..986f71465 100644 --- a/integration/bundle/helpers_test.go +++ b/integration/bundle/helpers_test.go @@ -18,6 +18,7 @@ import ( "github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/folders" + "github.com/databricks/cli/libs/telemetry" "github.com/databricks/cli/libs/template" "github.com/databricks/databricks-sdk-go" "github.com/stretchr/testify/require" @@ -38,6 +39,7 @@ func initTestTemplateWithBundleRoot(t testutil.TestingT, ctx context.Context, te ctx = root.SetWorkspaceClient(ctx, nil) cmd := cmdio.NewIO(ctx, flags.OutputJSON, strings.NewReader(""), os.Stdout, os.Stderr, "", "bundles") ctx = cmdio.InContext(ctx, cmd) + ctx = telemetry.WithMockLogger(ctx) r := template.Resolver{ TemplatePathOrUrl: templateRoot, diff --git a/integration/bundle/init_test.go b/integration/bundle/init_test.go index 87a3e30e5..241213571 100644 --- a/integration/bundle/init_test.go +++ b/integration/bundle/init_test.go @@ -15,6 +15,8 @@ import ( "github.com/databricks/cli/internal/testcli" "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/iamutil" + "github.com/databricks/cli/libs/telemetry" + "github.com/databricks/cli/libs/template" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -42,6 +44,9 @@ func TestBundleInitOnMlopsStacks(t *testing.T) { ctx, wt := acc.WorkspaceTest(t) w := wt.W + // Use mock logger to introspect the telemetry payload. + ctx = telemetry.WithMockLogger(ctx) + tmpDir1 := t.TempDir() tmpDir2 := t.TempDir() @@ -64,6 +69,29 @@ func TestBundleInitOnMlopsStacks(t *testing.T) { assert.NoFileExists(t, filepath.Join(tmpDir2, "repo_name", projectName, "README.md")) testcli.RequireSuccessfulRun(t, ctx, "bundle", "init", "mlops-stacks", "--output-dir", tmpDir2, "--config-file", filepath.Join(tmpDir1, "config.json")) + // Assert the telemetry payload is correctly logged. + telemetryEvents := telemetry.Introspect(ctx) + require.Len(t, telemetry.Introspect(ctx), 1) + event := telemetryEvents[0].BundleInitEvent + assert.Equal(t, "mlops-stacks", event.TemplateName) + + get := func(key string) string { + for _, v := range event.TemplateEnumArgs { + if v.Key == key { + return v.Value + } + } + return "" + } + + // Enum values should be present in the telemetry payload. + assert.Equal(t, "no", get("input_include_models_in_unity_catalog")) + assert.Equal(t, strings.ToLower(env), get("input_cloud")) + + // Freeform strings should not be present in the telemetry payload. + assert.Equal(t, "", get("input_project_name")) + assert.Equal(t, "", get("input_root_dir")) + // Assert that the README.md file was created contents := testutil.ReadFile(t, filepath.Join(tmpDir2, "repo_name", projectName, "README.md")) assert.Contains(t, contents, "# "+projectName) @@ -99,6 +127,156 @@ func TestBundleInitOnMlopsStacks(t *testing.T) { assert.Contains(t, job.Settings.Name, fmt.Sprintf("dev-%s-batch-inference-job", projectName)) } +func TestBundleInitTelemetryForDefaultTemplates(t *testing.T) { + projectName := testutil.RandomName("name_") + + tcases := []struct { + name string + args map[string]string + expectedArgs map[string]string + }{ + { + name: "dbt-sql", + args: map[string]string{ + "project_name": fmt.Sprintf("dbt-sql-%s", projectName), + "http_path": "/sql/1.0/warehouses/id", + "default_catalog": "abcd", + "personal_schemas": "yes, use a schema based on the current user name during development", + }, + expectedArgs: map[string]string{ + "personal_schemas": "yes, use a schema based on the current user name during development", + }, + }, + { + name: "default-python", + args: map[string]string{ + "project_name": fmt.Sprintf("default_python_%s", projectName), + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "no", + }, + expectedArgs: map[string]string{ + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "no", + }, + }, + { + name: "default-sql", + args: map[string]string{ + "project_name": fmt.Sprintf("sql_project_%s", projectName), + "http_path": "/sql/1.0/warehouses/id", + "default_catalog": "abcd", + "personal_schemas": "yes, automatically use a schema based on the current user name during development", + }, + expectedArgs: map[string]string{ + "personal_schemas": "yes, automatically use a schema based on the current user name during development", + }, + }, + } + + for _, tc := range tcases { + ctx, _ := acc.WorkspaceTest(t) + + // Use mock logger to introspect the telemetry payload. + ctx = telemetry.WithMockLogger(ctx) + + tmpDir1 := t.TempDir() + tmpDir2 := t.TempDir() + + // Create a config file with the project name and root dir + initConfig := tc.args + b, err := json.Marshal(initConfig) + require.NoError(t, err) + err = os.WriteFile(filepath.Join(tmpDir1, "config.json"), b, 0o644) + require.NoError(t, err) + + // Run bundle init + assert.NoDirExists(t, filepath.Join(tmpDir2, tc.args["project_name"])) + testcli.RequireSuccessfulRun(t, ctx, "bundle", "init", tc.name, "--output-dir", tmpDir2, "--config-file", filepath.Join(tmpDir1, "config.json")) + assert.DirExists(t, filepath.Join(tmpDir2, tc.args["project_name"])) + + // Assert the telemetry payload is correctly logged. + logs := telemetry.Introspect(ctx) + require.Len(t, logs, 1) + event := logs[0].BundleInitEvent + assert.Equal(t, event.TemplateName, tc.name) + + get := func(key string) string { + for _, v := range event.TemplateEnumArgs { + if v.Key == key { + return v.Value + } + } + return "" + } + + // Assert the template enum args are correctly logged. + assert.Len(t, event.TemplateEnumArgs, len(tc.expectedArgs)) + for k, v := range tc.expectedArgs { + assert.Equal(t, get(k), v) + } + } +} + +func TestBundleInitTelemetryForCustomTemplates(t *testing.T) { + ctx, _ := acc.WorkspaceTest(t) + + tmpDir1 := t.TempDir() + tmpDir2 := t.TempDir() + tmpDir3 := t.TempDir() + + err := os.Mkdir(filepath.Join(tmpDir1, "template"), 0o755) + require.NoError(t, err) + err = os.WriteFile(filepath.Join(tmpDir1, "template", "foo.txt.tmpl"), []byte("{{bundle_uuid}}"), 0o644) + require.NoError(t, err) + err = os.WriteFile(filepath.Join(tmpDir1, "databricks_template_schema.json"), []byte(` + { + "properties": { + "a": { + "description": "whatever", + "type": "string" + }, + "b": { + "description": "whatever", + "type": "string", + "enum": ["yes", "no"] + } + } + } + `), 0o644) + require.NoError(t, err) + + // Create a config file with the project name and root dir + initConfig := map[string]string{ + "a": "v1", + "b": "yes", + } + b, err := json.Marshal(initConfig) + require.NoError(t, err) + err = os.WriteFile(filepath.Join(tmpDir3, "config.json"), b, 0o644) + require.NoError(t, err) + + // Use mock logger to introspect the telemetry payload. + ctx = telemetry.WithMockLogger(ctx) + + // Run bundle init. + testcli.RequireSuccessfulRun(t, ctx, "bundle", "init", tmpDir1, "--output-dir", tmpDir2, "--config-file", filepath.Join(tmpDir3, "config.json")) + + // Assert the telemetry payload is correctly logged. For custom templates we should + // never set template_enum_args. + telemetryEvents := telemetry.Introspect(ctx) + require.Len(t, telemetryEvents, 1) + event := telemetryEvents[0].BundleInitEvent + assert.Equal(t, string(template.Custom), event.TemplateName) + assert.Empty(t, event.TemplateEnumArgs) + + // Ensure that the UUID returned by the `bundle_uuid` helper is the same UUID + // that's logged in the telemetry event. + fileC := testutil.ReadFile(t, filepath.Join(tmpDir2, "foo.txt")) + assert.Equal(t, event.Uuid, fileC) +} + func TestBundleInitHelpers(t *testing.T) { ctx, wt := acc.WorkspaceTest(t) w := wt.W diff --git a/libs/telemetry/protos/bundle_init.go b/libs/telemetry/protos/bundle_init.go index b02008314..4f8b4a38d 100644 --- a/libs/telemetry/protos/bundle_init.go +++ b/libs/telemetry/protos/bundle_init.go @@ -6,7 +6,7 @@ type BundleInitEvent struct { // UUID associated with the DAB itself. This is serialized into the DAB // when a user runs `databricks bundle init` and all subsequent deployments of // that DAB can then be associated with this init event. - Uuid string `json:"bundle_uuid,omitempty"` + Uuid string `json:"uuid,omitempty"` // Name of the template initialized when the user ran `databricks bundle init` // This is only populated when the template is a first party template like diff --git a/libs/template/config.go b/libs/template/config.go index 919ba2250..12dce7e60 100644 --- a/libs/template/config.go +++ b/libs/template/config.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io/fs" + "slices" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/jsonschema" @@ -273,3 +274,25 @@ func (c *config) validate() error { } return nil } + +// Return enum values selected by the user during template initialization. These +// values are safe to send over in telemetry events due to their limited cardinality. +func (c *config) enumValues() map[string]string { + res := map[string]string{} + for k, p := range c.schema.Properties { + // For now, we only send over string enum values since only those are + // being used in first party Databricks templates. + if p.Type != jsonschema.StringType { + continue + } + if p.Enum == nil { + continue + } + v := c.values[k] + + if slices.Contains(p.Enum, v) { + res[k] = v.(string) + } + } + return res +} diff --git a/libs/template/config_test.go b/libs/template/config_test.go index 515a0b9f5..3f971a862 100644 --- a/libs/template/config_test.go +++ b/libs/template/config_test.go @@ -564,3 +564,42 @@ func TestPromptIsSkippedAnyOf(t *testing.T) { assert.True(t, skip) assert.Equal(t, "hello-world", c.values["xyz"]) } + +func TestConfigEnumValues(t *testing.T) { + c := &config{ + schema: &jsonschema.Schema{ + Properties: map[string]*jsonschema.Schema{ + "a": { + Type: jsonschema.StringType, + }, + "b": { + Type: jsonschema.BooleanType, + }, + "c": { + Type: jsonschema.StringType, + Enum: []any{"v1", "v2"}, + }, + "d": { + Type: jsonschema.StringType, + Enum: []any{"v3", "v4"}, + }, + "e": { + Type: jsonschema.StringType, + Enum: []any{"v5", "v6"}, + }, + }, + }, + values: map[string]any{ + "a": "w1", + "b": false, + "c": "v1", + "d": "v3", + "e": "v7", + }, + } + + assert.Equal(t, map[string]string{ + "c": "v1", + "d": "v3", + }, c.enumValues()) +} diff --git a/libs/template/resolver.go b/libs/template/resolver.go index 2cc8bf1c7..adf3de15b 100644 --- a/libs/template/resolver.go +++ b/libs/template/resolver.go @@ -10,7 +10,6 @@ import ( var gitUrlPrefixes = []string{ "https://", - "git@", } func isRepoUrl(url string) bool { @@ -27,7 +26,6 @@ func isRepoUrl(url string) bool { type Resolver struct { // One of the following three: // 1. Path to a local template directory. - // 2. URL to a Git repository containing a template. // 3. Name of a built-in template. TemplatePathOrUrl string diff --git a/libs/template/writer.go b/libs/template/writer.go index e3d5af583..54d5693be 100644 --- a/libs/template/writer.go +++ b/libs/template/writer.go @@ -12,6 +12,8 @@ import ( "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/telemetry" + "github.com/databricks/cli/libs/telemetry/protos" ) const ( @@ -29,11 +31,15 @@ type Writer interface { // Materialize the template to the local file system. Materialize(ctx context.Context, r Reader) error + + // Log telemetry associated with the initialization of this template. + LogTelemetry(ctx context.Context) } type defaultWriter struct { - configPath string - outputFiler filer.Filer + configPath string + outputFiler filer.Filer + templateName TemplateName // Internal state config *config @@ -60,19 +66,19 @@ func constructOutputFiler(ctx context.Context, outputDir string) (filer.Filer, e return filer.NewLocalClient(outputDir) } -func (tmpl *defaultWriter) Configure(ctx context.Context, configPath, outputDir string) error { - tmpl.configPath = configPath +func (writer *defaultWriter) Configure(ctx context.Context, configPath, outputDir string) error { + writer.configPath = configPath outputFiler, err := constructOutputFiler(ctx, outputDir) if err != nil { return err } - tmpl.outputFiler = outputFiler + writer.outputFiler = outputFiler return nil } -func (tmpl *defaultWriter) promptForInput(ctx context.Context, reader Reader) error { +func (writer *defaultWriter) promptForInput(ctx context.Context, reader Reader) error { readerFs, err := reader.FS(ctx) if err != nil { return err @@ -81,29 +87,29 @@ func (tmpl *defaultWriter) promptForInput(ctx context.Context, reader Reader) er return fmt.Errorf("not a bundle template: expected to find a template schema file at %s", schemaFileName) } - tmpl.config, err = newConfig(ctx, readerFs, schemaFileName) + writer.config, err = newConfig(ctx, readerFs, schemaFileName) if err != nil { return err } // Read and assign config values from file - if tmpl.configPath != "" { - err = tmpl.config.assignValuesFromFile(tmpl.configPath) + if writer.configPath != "" { + err = writer.config.assignValuesFromFile(writer.configPath) if err != nil { return err } } helpers := loadHelpers(ctx) - tmpl.renderer, err = newRenderer(ctx, tmpl.config.values, helpers, readerFs, templateDirName, libraryDirName) + writer.renderer, err = newRenderer(ctx, writer.config.values, helpers, readerFs, templateDirName, libraryDirName) if err != nil { return err } // Print welcome message - welcome := tmpl.config.schema.WelcomeMessage + welcome := writer.config.schema.WelcomeMessage if welcome != "" { - welcome, err = tmpl.renderer.executeTemplate(welcome) + welcome, err = writer.renderer.executeTemplate(welcome) if err != nil { return err } @@ -112,21 +118,21 @@ func (tmpl *defaultWriter) promptForInput(ctx context.Context, reader Reader) er // Prompt user for any missing config values. Assign default values if // terminal is not TTY - err = tmpl.config.promptOrAssignDefaultValues(tmpl.renderer) + err = writer.config.promptOrAssignDefaultValues(writer.renderer) if err != nil { return err } - return tmpl.config.validate() + return writer.config.validate() } -func (tmpl *defaultWriter) printSuccessMessage(ctx context.Context) error { - success := tmpl.config.schema.SuccessMessage +func (writer *defaultWriter) printSuccessMessage(ctx context.Context) error { + success := writer.config.schema.SuccessMessage if success == "" { cmdio.LogString(ctx, "✨ Successfully initialized template") return nil } - success, err := tmpl.renderer.executeTemplate(success) + success, err := writer.renderer.executeTemplate(success) if err != nil { return err } @@ -134,38 +140,62 @@ func (tmpl *defaultWriter) printSuccessMessage(ctx context.Context) error { return nil } -func (tmpl *defaultWriter) Materialize(ctx context.Context, reader Reader) error { - err := tmpl.promptForInput(ctx, reader) +func (writer *defaultWriter) Materialize(ctx context.Context, reader Reader) error { + err := writer.promptForInput(ctx, reader) if err != nil { return err } // Walk the template file tree and compute in-memory representations of the // output files. - err = tmpl.renderer.walk() + err = writer.renderer.walk() if err != nil { return err } // Flush the output files to disk. - err = tmpl.renderer.persistToDisk(ctx, tmpl.outputFiler) + err = writer.renderer.persistToDisk(ctx, writer.outputFiler) if err != nil { return err } - return tmpl.printSuccessMessage(ctx) + return writer.printSuccessMessage(ctx) } -func (tmpl *defaultWriter) LogTelemetry(ctx context.Context) error { - // TODO, only log the template name and uuid. - return nil +func (writer *defaultWriter) LogTelemetry(ctx context.Context) { + // We don't log template enum args in the default template writer. + // This is because this writer can be used for customer templates, and we + // don't want to log PII. + event := telemetry.DatabricksCliLog{ + BundleInitEvent: &protos.BundleInitEvent{ + Uuid: bundleUuid, + TemplateName: string(writer.templateName), + }, + } + + telemetry.Log(ctx, event) } type writerWithFullTelemetry struct { defaultWriter } -func (tmpl *writerWithFullTelemetry) LogTelemetry(ctx context.Context) error { - // TODO, log template name, uuid and enum args as well. - return nil +func (writer *writerWithFullTelemetry) LogTelemetry(ctx context.Context) { + templateEnumArgs := []protos.BundleInitTemplateEnumArg{} + for k, v := range writer.config.enumValues() { + templateEnumArgs = append(templateEnumArgs, protos.BundleInitTemplateEnumArg{ + Key: k, + Value: v, + }) + } + + event := telemetry.DatabricksCliLog{ + BundleInitEvent: &protos.BundleInitEvent{ + Uuid: bundleUuid, + TemplateName: string(writer.templateName), + TemplateEnumArgs: templateEnumArgs, + }, + } + + telemetry.Log(ctx, event) } diff --git a/libs/template/writer_test.go b/libs/template/writer_test.go index 9d57966ee..3640efbd8 100644 --- a/libs/template/writer_test.go +++ b/libs/template/writer_test.go @@ -8,6 +8,9 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/jsonschema" + "github.com/databricks/cli/libs/telemetry" + "github.com/databricks/cli/libs/telemetry/protos" "github.com/databricks/databricks-sdk-go" workspaceConfig "github.com/databricks/databricks-sdk-go/config" "github.com/stretchr/testify/assert" @@ -56,3 +59,56 @@ func TestMaterializeForNonTemplateDirectory(t *testing.T) { err = w.Materialize(ctx, &localReader{path: tmpDir2}) assert.EqualError(t, err, "not a bundle template: expected to find a template schema file at databricks_template_schema.json") } + +func TestDefaultWriterLogTelemetry(t *testing.T) { + ctx := telemetry.WithMockLogger(context.Background()) + w := &defaultWriter{templateName: Custom} + w.LogTelemetry(ctx) + + logs := telemetry.Introspect(ctx) + assert.Len(t, logs, 1) + assert.Equal(t, &protos.BundleInitEvent{ + TemplateName: string(Custom), + Uuid: bundleUuid, + }, logs[0].BundleInitEvent) +} + +func TestWriterWithFullTelemetry(t *testing.T) { + ctx := telemetry.WithMockLogger(context.Background()) + w := &writerWithFullTelemetry{ + defaultWriter: defaultWriter{ + templateName: DefaultPython, + config: &config{ + values: map[string]any{ + "foo": "v1", + "bar": "v2", + }, + schema: &jsonschema.Schema{ + Properties: map[string]*jsonschema.Schema{ + "foo": { + Type: jsonschema.StringType, + Enum: []any{"v1", "v2"}, + }, + "bar": { + Type: jsonschema.StringType, + }, + }, + }, + }, + }, + } + w.LogTelemetry(ctx) + + logs := telemetry.Introspect(ctx) + assert.Len(t, logs, 1) + assert.Equal(t, &protos.BundleInitEvent{ + TemplateName: string(DefaultPython), + TemplateEnumArgs: []protos.BundleInitTemplateEnumArg{ + { + Key: "foo", + Value: "v1", + }, + }, + Uuid: bundleUuid, + }, logs[0].BundleInitEvent) +}