databricks-cli/cmd/bundle/init.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

100 lines
3.1 KiB
Go
Raw Normal View History

package bundle
import (
"errors"
"fmt"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/cmdio"
2025-01-20 17:15:29 +00:00
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/telemetry"
"github.com/databricks/cli/libs/template"
2025-01-20 17:15:29 +00:00
"github.com/databricks/databricks-sdk-go/client"
"github.com/spf13/cobra"
)
func newInitCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "init [TEMPLATE_PATH]",
Short: "Initialize using a bundle template",
Args: root.MaximumNArgs(1),
Long: fmt.Sprintf(`Initialize using a bundle template.
TEMPLATE_PATH optionally specifies which template to use. It can be one of the following:
%s
- a local file system path with a template directory
- a Git repository URL, e.g. https://github.com/my/repository
Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing.
2025-01-20 12:09:28 +00:00
See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.`, template.HelpDescriptions()),
}
var configFile string
var outputDir string
var templateDir string
var tag string
var branch string
cmd.Flags().StringVar(&configFile, "config-file", "", "JSON file containing key value pairs of input parameters required for template initialization.")
cmd.Flags().StringVar(&templateDir, "template-dir", "", "Directory path within a Git repository containing the template.")
cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to.")
cmd.Flags().StringVar(&branch, "tag", "", "Git tag to use for template initialization")
cmd.Flags().StringVar(&tag, "branch", "", "Git branch to use for template initialization")
2025-01-20 17:15:29 +00:00
cmd.PreRunE = func(cmd *cobra.Command, args []string) error {
// Configure the logger to send telemetry to Databricks.
ctx := telemetry.WithDefaultLogger(cmd.Context())
cmd.SetContext(ctx)
return root.MustWorkspaceClient(cmd, args)
}
cmd.PostRun = func(cmd *cobra.Command, args []string) {
ctx := cmd.Context()
w := root.WorkspaceClient(ctx)
apiClient, err := client.New(w.Config)
if err != nil {
// Uploading telemetry is best effort. Do not error.
log.Debugf(ctx, "Could not create API client to send telemetry using: %v", err)
return
}
2025-01-20 17:15:29 +00:00
telemetry.Flush(cmd.Context(), apiClient)
}
cmd.RunE = func(cmd *cobra.Command, args []string) error {
Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing.
2025-01-20 12:09:28 +00:00
var templatePathOrUrl string
if len(args) > 0 {
Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing.
2025-01-20 12:09:28 +00:00
templatePathOrUrl = args[0]
}
Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing.
2025-01-20 12:09:28 +00:00
r := template.Resolver{
TemplatePathOrUrl: templatePathOrUrl,
ConfigFile: configFile,
OutputDir: outputDir,
TemplateDir: templateDir,
Tag: tag,
Branch: branch,
}
Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing.
2025-01-20 12:09:28 +00:00
ctx := cmd.Context()
tmpl, err := r.Resolve(ctx)
if errors.Is(err, template.ErrCustomSelected) {
cmdio.LogString(ctx, "Please specify a path or Git repository to use a custom template.")
cmdio.LogString(ctx, "See https://docs.databricks.com/en/dev-tools/bundles/templates.html to learn more about custom templates.")
return nil
}
if err != nil {
return err
}
Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing.
2025-01-20 12:09:28 +00:00
defer tmpl.Reader.Cleanup(ctx)
2025-01-20 17:15:29 +00:00
err = tmpl.Writer.Materialize(ctx, tmpl.Reader)
if err != nil {
return err
}
defer tmpl.Reader.Cleanup(ctx)
tmpl.Writer.LogTelemetry(ctx)
return nil
}
return cmd
}