squash all commits

This commit is contained in:
Shreyas Goenka 2025-01-20 18:15:29 +01:00
parent ab10720027
commit 38bacc58b0
No known key found for this signature in database
GPG Key ID: 92A07DF49CCB0622
9 changed files with 388 additions and 36 deletions

View File

@ -6,7 +6,10 @@ import (
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/telemetry"
"github.com/databricks/cli/libs/template"
"github.com/databricks/databricks-sdk-go/client"
"github.com/spf13/cobra"
)
@ -36,12 +39,28 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf
cmd.Flags().StringVar(&branch, "tag", "", "Git tag to use for template initialization")
cmd.Flags().StringVar(&tag, "branch", "", "Git branch to use for template initialization")
cmd.PreRunE = root.MustWorkspaceClient
cmd.RunE = func(cmd *cobra.Command, args []string) error {
if tag != "" && branch != "" {
return errors.New("only one of --tag or --branch can be specified")
cmd.PreRunE = func(cmd *cobra.Command, args []string) error {
// Configure the logger to send telemetry to Databricks.
ctx := telemetry.WithDefaultLogger(cmd.Context())
cmd.SetContext(ctx)
return root.MustWorkspaceClient(cmd, args)
}
cmd.PostRun = func(cmd *cobra.Command, args []string) {
ctx := cmd.Context()
w := root.WorkspaceClient(ctx)
apiClient, err := client.New(w.Config)
if err != nil {
// Uploading telemetry is best effort. Do not error.
log.Debugf(ctx, "Could not create API client to send telemetry using: %v", err)
return
}
telemetry.Flush(cmd.Context(), apiClient)
}
cmd.RunE = func(cmd *cobra.Command, args []string) error {
var templatePathOrUrl string
if len(args) > 0 {
templatePathOrUrl = args[0]
@ -67,7 +86,14 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf
}
defer tmpl.Reader.Cleanup(ctx)
return tmpl.Writer.Materialize(ctx, tmpl.Reader)
err = tmpl.Writer.Materialize(ctx, tmpl.Reader)
if err != nil {
return err
}
defer tmpl.Reader.Cleanup(ctx)
tmpl.Writer.LogTelemetry(ctx)
return nil
}
return cmd
}

View File

@ -18,6 +18,7 @@ import (
"github.com/databricks/cli/libs/env"
"github.com/databricks/cli/libs/flags"
"github.com/databricks/cli/libs/folders"
"github.com/databricks/cli/libs/telemetry"
"github.com/databricks/cli/libs/template"
"github.com/databricks/databricks-sdk-go"
"github.com/stretchr/testify/require"
@ -38,6 +39,7 @@ func initTestTemplateWithBundleRoot(t testutil.TestingT, ctx context.Context, te
ctx = root.SetWorkspaceClient(ctx, nil)
cmd := cmdio.NewIO(ctx, flags.OutputJSON, strings.NewReader(""), os.Stdout, os.Stderr, "", "bundles")
ctx = cmdio.InContext(ctx, cmd)
ctx = telemetry.WithMockLogger(ctx)
r := template.Resolver{
TemplatePathOrUrl: templateRoot,

View File

@ -15,6 +15,8 @@ import (
"github.com/databricks/cli/internal/testcli"
"github.com/databricks/cli/internal/testutil"
"github.com/databricks/cli/libs/iamutil"
"github.com/databricks/cli/libs/telemetry"
"github.com/databricks/cli/libs/template"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -42,6 +44,9 @@ func TestBundleInitOnMlopsStacks(t *testing.T) {
ctx, wt := acc.WorkspaceTest(t)
w := wt.W
// Use mock logger to introspect the telemetry payload.
ctx = telemetry.WithMockLogger(ctx)
tmpDir1 := t.TempDir()
tmpDir2 := t.TempDir()
@ -64,6 +69,29 @@ func TestBundleInitOnMlopsStacks(t *testing.T) {
assert.NoFileExists(t, filepath.Join(tmpDir2, "repo_name", projectName, "README.md"))
testcli.RequireSuccessfulRun(t, ctx, "bundle", "init", "mlops-stacks", "--output-dir", tmpDir2, "--config-file", filepath.Join(tmpDir1, "config.json"))
// Assert the telemetry payload is correctly logged.
telemetryEvents := telemetry.Introspect(ctx)
require.Len(t, telemetry.Introspect(ctx), 1)
event := telemetryEvents[0].BundleInitEvent
assert.Equal(t, "mlops-stacks", event.TemplateName)
get := func(key string) string {
for _, v := range event.TemplateEnumArgs {
if v.Key == key {
return v.Value
}
}
return ""
}
// Enum values should be present in the telemetry payload.
assert.Equal(t, "no", get("input_include_models_in_unity_catalog"))
assert.Equal(t, strings.ToLower(env), get("input_cloud"))
// Freeform strings should not be present in the telemetry payload.
assert.Equal(t, "", get("input_project_name"))
assert.Equal(t, "", get("input_root_dir"))
// Assert that the README.md file was created
contents := testutil.ReadFile(t, filepath.Join(tmpDir2, "repo_name", projectName, "README.md"))
assert.Contains(t, contents, "# "+projectName)
@ -99,6 +127,156 @@ func TestBundleInitOnMlopsStacks(t *testing.T) {
assert.Contains(t, job.Settings.Name, fmt.Sprintf("dev-%s-batch-inference-job", projectName))
}
func TestBundleInitTelemetryForDefaultTemplates(t *testing.T) {
projectName := testutil.RandomName("name_")
tcases := []struct {
name string
args map[string]string
expectedArgs map[string]string
}{
{
name: "dbt-sql",
args: map[string]string{
"project_name": fmt.Sprintf("dbt-sql-%s", projectName),
"http_path": "/sql/1.0/warehouses/id",
"default_catalog": "abcd",
"personal_schemas": "yes, use a schema based on the current user name during development",
},
expectedArgs: map[string]string{
"personal_schemas": "yes, use a schema based on the current user name during development",
},
},
{
name: "default-python",
args: map[string]string{
"project_name": fmt.Sprintf("default_python_%s", projectName),
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "no",
},
expectedArgs: map[string]string{
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "no",
},
},
{
name: "default-sql",
args: map[string]string{
"project_name": fmt.Sprintf("sql_project_%s", projectName),
"http_path": "/sql/1.0/warehouses/id",
"default_catalog": "abcd",
"personal_schemas": "yes, automatically use a schema based on the current user name during development",
},
expectedArgs: map[string]string{
"personal_schemas": "yes, automatically use a schema based on the current user name during development",
},
},
}
for _, tc := range tcases {
ctx, _ := acc.WorkspaceTest(t)
// Use mock logger to introspect the telemetry payload.
ctx = telemetry.WithMockLogger(ctx)
tmpDir1 := t.TempDir()
tmpDir2 := t.TempDir()
// Create a config file with the project name and root dir
initConfig := tc.args
b, err := json.Marshal(initConfig)
require.NoError(t, err)
err = os.WriteFile(filepath.Join(tmpDir1, "config.json"), b, 0o644)
require.NoError(t, err)
// Run bundle init
assert.NoDirExists(t, filepath.Join(tmpDir2, tc.args["project_name"]))
testcli.RequireSuccessfulRun(t, ctx, "bundle", "init", tc.name, "--output-dir", tmpDir2, "--config-file", filepath.Join(tmpDir1, "config.json"))
assert.DirExists(t, filepath.Join(tmpDir2, tc.args["project_name"]))
// Assert the telemetry payload is correctly logged.
logs := telemetry.Introspect(ctx)
require.Len(t, logs, 1)
event := logs[0].BundleInitEvent
assert.Equal(t, event.TemplateName, tc.name)
get := func(key string) string {
for _, v := range event.TemplateEnumArgs {
if v.Key == key {
return v.Value
}
}
return ""
}
// Assert the template enum args are correctly logged.
assert.Len(t, event.TemplateEnumArgs, len(tc.expectedArgs))
for k, v := range tc.expectedArgs {
assert.Equal(t, get(k), v)
}
}
}
func TestBundleInitTelemetryForCustomTemplates(t *testing.T) {
ctx, _ := acc.WorkspaceTest(t)
tmpDir1 := t.TempDir()
tmpDir2 := t.TempDir()
tmpDir3 := t.TempDir()
err := os.Mkdir(filepath.Join(tmpDir1, "template"), 0o755)
require.NoError(t, err)
err = os.WriteFile(filepath.Join(tmpDir1, "template", "foo.txt.tmpl"), []byte("{{bundle_uuid}}"), 0o644)
require.NoError(t, err)
err = os.WriteFile(filepath.Join(tmpDir1, "databricks_template_schema.json"), []byte(`
{
"properties": {
"a": {
"description": "whatever",
"type": "string"
},
"b": {
"description": "whatever",
"type": "string",
"enum": ["yes", "no"]
}
}
}
`), 0o644)
require.NoError(t, err)
// Create a config file with the project name and root dir
initConfig := map[string]string{
"a": "v1",
"b": "yes",
}
b, err := json.Marshal(initConfig)
require.NoError(t, err)
err = os.WriteFile(filepath.Join(tmpDir3, "config.json"), b, 0o644)
require.NoError(t, err)
// Use mock logger to introspect the telemetry payload.
ctx = telemetry.WithMockLogger(ctx)
// Run bundle init.
testcli.RequireSuccessfulRun(t, ctx, "bundle", "init", tmpDir1, "--output-dir", tmpDir2, "--config-file", filepath.Join(tmpDir3, "config.json"))
// Assert the telemetry payload is correctly logged. For custom templates we should
// never set template_enum_args.
telemetryEvents := telemetry.Introspect(ctx)
require.Len(t, telemetryEvents, 1)
event := telemetryEvents[0].BundleInitEvent
assert.Equal(t, string(template.Custom), event.TemplateName)
assert.Empty(t, event.TemplateEnumArgs)
// Ensure that the UUID returned by the `bundle_uuid` helper is the same UUID
// that's logged in the telemetry event.
fileC := testutil.ReadFile(t, filepath.Join(tmpDir2, "foo.txt"))
assert.Equal(t, event.Uuid, fileC)
}
func TestBundleInitHelpers(t *testing.T) {
ctx, wt := acc.WorkspaceTest(t)
w := wt.W

View File

@ -6,7 +6,7 @@ type BundleInitEvent struct {
// UUID associated with the DAB itself. This is serialized into the DAB
// when a user runs `databricks bundle init` and all subsequent deployments of
// that DAB can then be associated with this init event.
Uuid string `json:"bundle_uuid,omitempty"`
Uuid string `json:"uuid,omitempty"`
// Name of the template initialized when the user ran `databricks bundle init`
// This is only populated when the template is a first party template like

View File

@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io/fs"
"slices"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/jsonschema"
@ -273,3 +274,25 @@ func (c *config) validate() error {
}
return nil
}
// Return enum values selected by the user during template initialization. These
// values are safe to send over in telemetry events due to their limited cardinality.
func (c *config) enumValues() map[string]string {
res := map[string]string{}
for k, p := range c.schema.Properties {
// For now, we only send over string enum values since only those are
// being used in first party Databricks templates.
if p.Type != jsonschema.StringType {
continue
}
if p.Enum == nil {
continue
}
v := c.values[k]
if slices.Contains(p.Enum, v) {
res[k] = v.(string)
}
}
return res
}

View File

@ -564,3 +564,42 @@ func TestPromptIsSkippedAnyOf(t *testing.T) {
assert.True(t, skip)
assert.Equal(t, "hello-world", c.values["xyz"])
}
func TestConfigEnumValues(t *testing.T) {
c := &config{
schema: &jsonschema.Schema{
Properties: map[string]*jsonschema.Schema{
"a": {
Type: jsonschema.StringType,
},
"b": {
Type: jsonschema.BooleanType,
},
"c": {
Type: jsonschema.StringType,
Enum: []any{"v1", "v2"},
},
"d": {
Type: jsonschema.StringType,
Enum: []any{"v3", "v4"},
},
"e": {
Type: jsonschema.StringType,
Enum: []any{"v5", "v6"},
},
},
},
values: map[string]any{
"a": "w1",
"b": false,
"c": "v1",
"d": "v3",
"e": "v7",
},
}
assert.Equal(t, map[string]string{
"c": "v1",
"d": "v3",
}, c.enumValues())
}

View File

@ -10,7 +10,6 @@ import (
var gitUrlPrefixes = []string{
"https://",
"git@",
}
func isRepoUrl(url string) bool {
@ -27,7 +26,6 @@ func isRepoUrl(url string) bool {
type Resolver struct {
// One of the following three:
// 1. Path to a local template directory.
// 2. URL to a Git repository containing a template.
// 3. Name of a built-in template.
TemplatePathOrUrl string

View File

@ -12,6 +12,8 @@ import (
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/dbr"
"github.com/databricks/cli/libs/filer"
"github.com/databricks/cli/libs/telemetry"
"github.com/databricks/cli/libs/telemetry/protos"
)
const (
@ -29,11 +31,15 @@ type Writer interface {
// Materialize the template to the local file system.
Materialize(ctx context.Context, r Reader) error
// Log telemetry associated with the initialization of this template.
LogTelemetry(ctx context.Context)
}
type defaultWriter struct {
configPath string
outputFiler filer.Filer
configPath string
outputFiler filer.Filer
templateName TemplateName
// Internal state
config *config
@ -60,19 +66,19 @@ func constructOutputFiler(ctx context.Context, outputDir string) (filer.Filer, e
return filer.NewLocalClient(outputDir)
}
func (tmpl *defaultWriter) Configure(ctx context.Context, configPath, outputDir string) error {
tmpl.configPath = configPath
func (writer *defaultWriter) Configure(ctx context.Context, configPath, outputDir string) error {
writer.configPath = configPath
outputFiler, err := constructOutputFiler(ctx, outputDir)
if err != nil {
return err
}
tmpl.outputFiler = outputFiler
writer.outputFiler = outputFiler
return nil
}
func (tmpl *defaultWriter) promptForInput(ctx context.Context, reader Reader) error {
func (writer *defaultWriter) promptForInput(ctx context.Context, reader Reader) error {
readerFs, err := reader.FS(ctx)
if err != nil {
return err
@ -81,29 +87,29 @@ func (tmpl *defaultWriter) promptForInput(ctx context.Context, reader Reader) er
return fmt.Errorf("not a bundle template: expected to find a template schema file at %s", schemaFileName)
}
tmpl.config, err = newConfig(ctx, readerFs, schemaFileName)
writer.config, err = newConfig(ctx, readerFs, schemaFileName)
if err != nil {
return err
}
// Read and assign config values from file
if tmpl.configPath != "" {
err = tmpl.config.assignValuesFromFile(tmpl.configPath)
if writer.configPath != "" {
err = writer.config.assignValuesFromFile(writer.configPath)
if err != nil {
return err
}
}
helpers := loadHelpers(ctx)
tmpl.renderer, err = newRenderer(ctx, tmpl.config.values, helpers, readerFs, templateDirName, libraryDirName)
writer.renderer, err = newRenderer(ctx, writer.config.values, helpers, readerFs, templateDirName, libraryDirName)
if err != nil {
return err
}
// Print welcome message
welcome := tmpl.config.schema.WelcomeMessage
welcome := writer.config.schema.WelcomeMessage
if welcome != "" {
welcome, err = tmpl.renderer.executeTemplate(welcome)
welcome, err = writer.renderer.executeTemplate(welcome)
if err != nil {
return err
}
@ -112,21 +118,21 @@ func (tmpl *defaultWriter) promptForInput(ctx context.Context, reader Reader) er
// Prompt user for any missing config values. Assign default values if
// terminal is not TTY
err = tmpl.config.promptOrAssignDefaultValues(tmpl.renderer)
err = writer.config.promptOrAssignDefaultValues(writer.renderer)
if err != nil {
return err
}
return tmpl.config.validate()
return writer.config.validate()
}
func (tmpl *defaultWriter) printSuccessMessage(ctx context.Context) error {
success := tmpl.config.schema.SuccessMessage
func (writer *defaultWriter) printSuccessMessage(ctx context.Context) error {
success := writer.config.schema.SuccessMessage
if success == "" {
cmdio.LogString(ctx, "✨ Successfully initialized template")
return nil
}
success, err := tmpl.renderer.executeTemplate(success)
success, err := writer.renderer.executeTemplate(success)
if err != nil {
return err
}
@ -134,38 +140,62 @@ func (tmpl *defaultWriter) printSuccessMessage(ctx context.Context) error {
return nil
}
func (tmpl *defaultWriter) Materialize(ctx context.Context, reader Reader) error {
err := tmpl.promptForInput(ctx, reader)
func (writer *defaultWriter) Materialize(ctx context.Context, reader Reader) error {
err := writer.promptForInput(ctx, reader)
if err != nil {
return err
}
// Walk the template file tree and compute in-memory representations of the
// output files.
err = tmpl.renderer.walk()
err = writer.renderer.walk()
if err != nil {
return err
}
// Flush the output files to disk.
err = tmpl.renderer.persistToDisk(ctx, tmpl.outputFiler)
err = writer.renderer.persistToDisk(ctx, writer.outputFiler)
if err != nil {
return err
}
return tmpl.printSuccessMessage(ctx)
return writer.printSuccessMessage(ctx)
}
func (tmpl *defaultWriter) LogTelemetry(ctx context.Context) error {
// TODO, only log the template name and uuid.
return nil
func (writer *defaultWriter) LogTelemetry(ctx context.Context) {
// We don't log template enum args in the default template writer.
// This is because this writer can be used for customer templates, and we
// don't want to log PII.
event := telemetry.DatabricksCliLog{
BundleInitEvent: &protos.BundleInitEvent{
Uuid: bundleUuid,
TemplateName: string(writer.templateName),
},
}
telemetry.Log(ctx, event)
}
type writerWithFullTelemetry struct {
defaultWriter
}
func (tmpl *writerWithFullTelemetry) LogTelemetry(ctx context.Context) error {
// TODO, log template name, uuid and enum args as well.
return nil
func (writer *writerWithFullTelemetry) LogTelemetry(ctx context.Context) {
templateEnumArgs := []protos.BundleInitTemplateEnumArg{}
for k, v := range writer.config.enumValues() {
templateEnumArgs = append(templateEnumArgs, protos.BundleInitTemplateEnumArg{
Key: k,
Value: v,
})
}
event := telemetry.DatabricksCliLog{
BundleInitEvent: &protos.BundleInitEvent{
Uuid: bundleUuid,
TemplateName: string(writer.templateName),
TemplateEnumArgs: templateEnumArgs,
},
}
telemetry.Log(ctx, event)
}

View File

@ -8,6 +8,9 @@ import (
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/dbr"
"github.com/databricks/cli/libs/filer"
"github.com/databricks/cli/libs/jsonschema"
"github.com/databricks/cli/libs/telemetry"
"github.com/databricks/cli/libs/telemetry/protos"
"github.com/databricks/databricks-sdk-go"
workspaceConfig "github.com/databricks/databricks-sdk-go/config"
"github.com/stretchr/testify/assert"
@ -56,3 +59,56 @@ func TestMaterializeForNonTemplateDirectory(t *testing.T) {
err = w.Materialize(ctx, &localReader{path: tmpDir2})
assert.EqualError(t, err, "not a bundle template: expected to find a template schema file at databricks_template_schema.json")
}
func TestDefaultWriterLogTelemetry(t *testing.T) {
ctx := telemetry.WithMockLogger(context.Background())
w := &defaultWriter{templateName: Custom}
w.LogTelemetry(ctx)
logs := telemetry.Introspect(ctx)
assert.Len(t, logs, 1)
assert.Equal(t, &protos.BundleInitEvent{
TemplateName: string(Custom),
Uuid: bundleUuid,
}, logs[0].BundleInitEvent)
}
func TestWriterWithFullTelemetry(t *testing.T) {
ctx := telemetry.WithMockLogger(context.Background())
w := &writerWithFullTelemetry{
defaultWriter: defaultWriter{
templateName: DefaultPython,
config: &config{
values: map[string]any{
"foo": "v1",
"bar": "v2",
},
schema: &jsonschema.Schema{
Properties: map[string]*jsonschema.Schema{
"foo": {
Type: jsonschema.StringType,
Enum: []any{"v1", "v2"},
},
"bar": {
Type: jsonschema.StringType,
},
},
},
},
},
}
w.LogTelemetry(ctx)
logs := telemetry.Introspect(ctx)
assert.Len(t, logs, 1)
assert.Equal(t, &protos.BundleInitEvent{
TemplateName: string(DefaultPython),
TemplateEnumArgs: []protos.BundleInitTemplateEnumArg{
{
Key: "foo",
Value: "v1",
},
},
Uuid: bundleUuid,
}, logs[0].BundleInitEvent)
}