more progress on cleaning up the APIs

This commit is contained in:
Shreyas Goenka 2025-01-03 16:13:53 +05:30
parent ce0667219a
commit ea0bc1705c
No known key found for this signature in database
GPG Key ID: 92A07DF49CCB0622
10 changed files with 543 additions and 131 deletions

View File

@ -4,8 +4,6 @@ import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"slices"
"strings"
@ -14,7 +12,6 @@ import (
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/dbr"
"github.com/databricks/cli/libs/filer"
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/telemetry"
"github.com/databricks/cli/libs/template"
@ -116,24 +113,6 @@ func getNativeTemplateByName(name string) *nativeTemplate {
return nil
}
func getFsForNativeTemplate(name string) (fs.FS, error) {
builtin, err := template.Builtin()
if err != nil {
return nil, err
}
// If this is a built-in template, the return value will be non-nil.
var templateFS fs.FS
for _, entry := range builtin {
if entry.Name == name {
templateFS = entry.FS
break
}
}
return templateFS, nil
}
func isRepoUrl(url string) bool {
result := false
for _, prefix := range gitUrlPrefixes {
@ -185,7 +164,7 @@ TEMPLATE_PATH optionally specifies which template to use. It can be one of the f
- a local file system path with a template directory
- a Git repository URL, e.g. https://github.com/my/repository
See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.`, nativeTemplateHelpDescriptions()),
See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.`, template.HelpDescriptions()),
}
var configFile string
@ -231,112 +210,51 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf
ref = tag
}
var tmpl *template.Template
var err error
ctx := cmd.Context()
var templatePath string
if len(args) > 0 {
templatePath = args[0]
} else {
var err error
if !cmdio.IsPromptSupported(ctx) {
return errors.New("please specify a template")
// User already specified a template local path or a Git URL. Use that
// information to configure a reader for the template
tmpl = template.Get(template.Custom)
// TODO: Get rid of the name arg.
if template.IsGitRepoUrl(args[0]) {
tmpl.SetReader(template.NewGitReader("", args[0], ref, templateDir))
} else {
tmpl.SetReader(template.NewLocalReader("", args[0]))
}
} else {
tmplId, err := template.PromptForTemplateId(cmd.Context(), ref, templateDir)
if tmplId == template.Custom {
// If a user selects custom during the prompt, ask them to provide a path or Git URL
// as a positional argument.
cmdio.LogString(ctx, "Please specify a path or Git repository to use a custom template.")
cmdio.LogString(ctx, "See https://docs.databricks.com/en/dev-tools/bundles/templates.html to learn more about custom templates.")
return nil
}
description, err := cmdio.SelectOrdered(ctx, nativeTemplateOptions(), "Template to use")
if err != nil {
return err
}
templatePath = getNativeTemplateByDescription(description)
tmpl = template.Get(tmplId)
}
defer tmpl.Reader.Close()
outputFiler, err := constructOutputFiler(ctx, outputDir)
if err != nil {
return err
}
if templatePath == customTemplate {
cmdio.LogString(ctx, "Please specify a path or Git repository to use a custom template.")
cmdio.LogString(ctx, "See https://docs.databricks.com/en/dev-tools/bundles/templates.html to learn more about custom templates.")
return nil
}
tmpl.Writer.Initialize(tmpl.Reader, configFile, outputFiler)
nt := getNativeTemplateByName(templatePath)
var templateName string
isTemplateDatabricksOwned := false
if nt != nil {
// If the template is a native template, templatePath is the name of the template.
// Eg: templatePath = "default-python".
templateName = templatePath
// if we have a Git URL for the native template, expand templatePath
// to the full URL.
if nt.gitUrl != "" {
templatePath = nt.gitUrl
}
isTemplateDatabricksOwned = true
}
if !isRepoUrl(templatePath) {
if templateDir != "" {
return errors.New("--template-dir can only be used with a Git repository URL")
}
templateFS, err := getFsForNativeTemplate(templatePath)
if err != nil {
return err
}
// If this is not a built-in template, then it must be a local file system path.
if templateFS == nil {
templateFS = os.DirFS(templatePath)
}
t := template.Template{
TemplateOpts: template.TemplateOpts{
ConfigFilePath: configFile,
TemplateFS: templateFS,
OutputFiler: outputFiler,
IsDatabricksOwned: isTemplateDatabricksOwned,
Name: templateName,
},
}
// skip downloading the repo because input arg is not a URL. We assume
// it's a path on the local file system in that case
return t.Materialize(ctx)
}
// Create a temporary directory with the name of the repository. The '*'
// character is replaced by a random string in the generated temporary directory.
repoDir, err := os.MkdirTemp("", repoName(templatePath)+"-*")
err = tmpl.Writer.Materialize(ctx)
if err != nil {
return err
}
// start the spinner
promptSpinner := cmdio.Spinner(ctx)
promptSpinner <- "Downloading the template\n"
// TODO: Add automated test that the downloaded git repo is cleaned up.
// Clone the repository in the temporary directory
err = git.Clone(ctx, templatePath, ref, repoDir)
close(promptSpinner)
if err != nil {
return err
}
// Clean up downloaded repository once the template is materialized.
defer os.RemoveAll(repoDir)
templateFS := os.DirFS(filepath.Join(repoDir, templateDir))
t := template.Template{
TemplateOpts: template.TemplateOpts{
ConfigFilePath: configFile,
TemplateFS: templateFS,
OutputFiler: outputFiler,
IsDatabricksOwned: isTemplateDatabricksOwned,
Name: templateName,
},
}
return t.Materialize(ctx)
return tmpl.Writer.LogTelemetry(ctx)
}
return cmd
}

View File

@ -38,14 +38,6 @@ func TestNativeTemplateOptions(t *testing.T) {
assert.Equal(t, expected, nativeTemplateOptions())
}
func TestNativeTemplateHelpDescriptions(t *testing.T) {
expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows
- default-sql: The default SQL template for .sql files that run with Databricks SQL
- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)
- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)`
assert.Equal(t, expected, nativeTemplateHelpDescriptions())
}
func TestGetNativeTemplateByName(t *testing.T) {
assert.Equal(t, "https://github.com/databricks/mlops-stacks", getNativeTemplateByName("mlops-stacks").gitUrl)
assert.Equal(t, "https://github.com/databricks/mlops-stacks", getNativeTemplateByName("mlops-stack").gitUrl)

View File

@ -44,7 +44,7 @@ func initTestTemplateWithBundleRoot(t testutil.TestingT, ctx context.Context, te
out, err := filer.NewLocalClient(bundleRoot)
require.NoError(t, err)
tmpl := template.Template{
tmpl := template.TemplateX{
TemplateOpts: template.TemplateOpts{
ConfigFilePath: configFilePath,
TemplateFS: os.DirFS(templateRoot),

View File

@ -15,6 +15,7 @@ type BuiltinTemplate struct {
}
// Builtin returns the list of all built-in templates.
// TODO: Make private?
func Builtin() ([]BuiltinTemplate, error) {
templates, err := fs.Sub(builtinTemplates, "templates")
if err != nil {

View File

@ -12,12 +12,6 @@ import (
"github.com/databricks/cli/libs/telemetry/events"
)
const (
libraryDirName = "library"
templateDirName = "template"
schemaFileName = "databricks_template_schema.json"
)
type TemplateOpts struct {
// file path containing user defined config values
ConfigFilePath string
@ -32,7 +26,7 @@ type TemplateOpts struct {
Name string
}
type Template struct {
type TemplateX struct {
TemplateOpts
// internal object used to prompt user for config values and store them.
@ -47,7 +41,7 @@ type Template struct {
// has provided a config file path.
// 2. For any values that are required by the template but not provided in the config
// file, this function prompts the user for them.
func (t *Template) resolveTemplateInput(ctx context.Context) error {
func (t *TemplateX) resolveTemplateInput(ctx context.Context) error {
if _, err := fs.Stat(t.TemplateFS, schemaFileName); errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("not a bundle template: expected to find a template schema file at %s", schemaFileName)
}
@ -91,7 +85,7 @@ func (t *Template) resolveTemplateInput(ctx context.Context) error {
return t.config.validate()
}
func (t *Template) printSuccessMessage(ctx context.Context) error {
func (t *TemplateX) printSuccessMessage(ctx context.Context) error {
success := t.config.schema.SuccessMessage
if success == "" {
cmdio.LogString(ctx, "✨ Successfully initialized template")
@ -106,7 +100,7 @@ func (t *Template) printSuccessMessage(ctx context.Context) error {
return nil
}
func (t *Template) logTelemetry(ctx context.Context) error {
func (t *TemplateX) logTelemetry(ctx context.Context) error {
// Only log telemetry input for Databricks owned templates. This is to prevent
// accidentally collecting PII from custom user templates.
templateEnumArgs := []events.BundleInitTemplateEnumArg{}
@ -135,7 +129,7 @@ func (t *Template) logTelemetry(ctx context.Context) error {
// This function materializes the input templates as a project, using user defined
// configurations.
func (t *Template) Materialize(ctx context.Context) error {
func (t *TemplateX) Materialize(ctx context.Context) error {
err := t.resolveTemplateInput(ctx)
if err != nil {
return err

View File

@ -18,7 +18,7 @@ func TestMaterializeForNonTemplateDirectory(t *testing.T) {
require.NoError(t, err)
ctx := root.SetWorkspaceClient(context.Background(), w)
tmpl := Template{
tmpl := TemplateX{
TemplateOpts: TemplateOpts{
ConfigFilePath: "",
TemplateFS: os.DirFS(tmpDir),

199
libs/template/reader.go Normal file
View File

@ -0,0 +1,199 @@
package template
import (
"context"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/git"
)
// TODO: Add tests for all these readers.
type Reader interface {
// FS returns a file system that contains the template
// definition files. This function is NOT thread safe.
FS(ctx context.Context) (fs.FS, error)
// Close releases any resources associated with the reader
// like cleaning up temporary directories.
Close() error
Name() string
}
type builtinReader struct {
name string
fsCached fs.FS
}
func (r *builtinReader) FS(ctx context.Context) (fs.FS, error) {
// If the FS has already been loaded, return it.
if r.fsCached != nil {
return r.fsCached, nil
}
builtin, err := Builtin()
if err != nil {
return nil, err
}
var templateFS fs.FS
for _, entry := range builtin {
if entry.Name == r.name {
templateFS = entry.FS
break
}
}
r.fsCached = templateFS
return r.fsCached, nil
}
func (r *builtinReader) Close() error {
return nil
}
func (r *builtinReader) Name() string {
return r.name
}
type gitReader struct {
name string
// URL of the git repository that contains the template
gitUrl string
// tag or branch to checkout
ref string
// subdirectory within the repository that contains the template
templateDir string
// temporary directory where the repository is cloned
tmpRepoDir string
fsCached fs.FS
}
// Computes the repo name from the repo URL. Treats the last non empty word
// when splitting at '/' as the repo name. For example: for url git@github.com:databricks/cli.git
// the name would be "cli.git"
func repoName(url string) string {
parts := strings.Split(strings.TrimRight(url, "/"), "/")
return parts[len(parts)-1]
}
var gitUrlPrefixes = []string{
"https://",
"git@",
}
// TODO: Copy over tests for this function.
func IsGitRepoUrl(url string) bool {
result := false
for _, prefix := range gitUrlPrefixes {
if strings.HasPrefix(url, prefix) {
result = true
break
}
}
return result
}
// TODO: Can I remove the name from here and other readers?
func NewGitReader(name, gitUrl, ref, templateDir string) Reader {
return &gitReader{
name: name,
gitUrl: gitUrl,
ref: ref,
templateDir: templateDir,
}
}
// TODO: Test the idempotency of this function as well.
func (r *gitReader) FS(ctx context.Context) (fs.FS, error) {
// If the FS has already been loaded, return it.
if r.fsCached != nil {
return r.fsCached, nil
}
// Create a temporary directory with the name of the repository. The '*'
// character is replaced by a random string in the generated temporary directory.
repoDir, err := os.MkdirTemp("", repoName(r.gitUrl)+"-*")
if err != nil {
return nil, err
}
r.tmpRepoDir = repoDir
// start the spinner
promptSpinner := cmdio.Spinner(ctx)
promptSpinner <- "Downloading the template\n"
err = git.Clone(ctx, r.gitUrl, r.ref, repoDir)
close(promptSpinner)
if err != nil {
return nil, err
}
r.fsCached = os.DirFS(filepath.Join(repoDir, r.templateDir))
return r.fsCached, nil
}
func (r *gitReader) Close() error {
if r.tmpRepoDir == "" {
return nil
}
return os.RemoveAll(r.tmpRepoDir)
}
func (r *gitReader) Name() string {
return r.name
}
type localReader struct {
name string
// Path on the local filesystem that contains the template
path string
fsCached fs.FS
}
func NewLocalReader(name, path string) Reader {
return &localReader{
name: name,
path: path,
}
}
func (r *localReader) FS(ctx context.Context) (fs.FS, error) {
// If the FS has already been loaded, return it.
if r.fsCached != nil {
return r.fsCached, nil
}
r.fsCached = os.DirFS(r.path)
return r.fsCached, nil
}
func (r *localReader) Close() error {
return nil
}
func (r *localReader) Name() string {
return r.name
}
type failReader struct{}
func (r *failReader) FS(ctx context.Context) (fs.FS, error) {
return nil, fmt.Errorf("this is a placeholder reader that always fails. Please configure a real reader.")
}
func (r *failReader) Close() error {
return fmt.Errorf("this is a placeholder reader that always fails. Please configure a real reader.")
}
func (r *failReader) Name() string {
return "failReader"
}

145
libs/template/template.go Normal file
View File

@ -0,0 +1,145 @@
package template
import (
"context"
"fmt"
"strings"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/filer"
)
type Template struct {
Reader Reader
Writer Writer
Id TemplateId
Description string
Aliases []string
Hidden bool
}
// TODO: Make details private?
// TODO: Combine this with the generic template struct?
type NativeTemplate struct {
Name string
Description string
Aliases []string
GitUrl string
Hidden bool
IsOwnedByDatabricks bool
}
type TemplateId string
const (
DefaultPython TemplateId = "default-python"
DefaultSql TemplateId = "default-sql"
DbtSql TemplateId = "dbt-sql"
MlopsStacks TemplateId = "mlops-stacks"
DefaultPydabs TemplateId = "default-pydabs"
Custom TemplateId = "custom"
)
var allTemplates = []Template{
{
Id: DefaultPython,
Description: "The default Python template for Notebooks / Delta Live Tables / Workflows",
Reader: &builtinReader{name: "default-python"},
Writer: &writerWithTelemetry{},
},
{
Id: DefaultSql,
Description: "The default SQL template for .sql files that run with Databricks SQL",
Reader: &builtinReader{name: "default-sql"},
Writer: &writerWithTelemetry{},
},
{
Id: DbtSql,
Description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)",
Reader: &builtinReader{name: "dbt-sql"},
Writer: &writerWithTelemetry{},
},
{
Id: MlopsStacks,
Description: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)",
Aliases: []string{"mlops-stack"},
Reader: &gitReader{gitUrl: "https://github.com/databricks/mlops-stacks"},
Writer: &writerWithTelemetry{},
},
{
Id: DefaultPydabs,
Hidden: true,
Description: "The default PyDABs template",
Reader: &gitReader{gitUrl: "https://databricks.github.io/workflows-authoring-toolkit/pydabs-template.git"},
Writer: &writerWithTelemetry{},
},
{
Id: Custom,
Description: "Bring your own template",
Reader: &failReader{},
Writer: &defaultWriter{},
},
}
func HelpDescriptions() string {
var lines []string
for _, template := range allTemplates {
if template.Id != Custom && !template.Hidden {
lines = append(lines, fmt.Sprintf("- %s: %s", template.Id, template.Description))
}
}
return strings.Join(lines, "\n")
}
func options() []cmdio.Tuple {
names := make([]cmdio.Tuple, 0, len(allTemplates))
for _, template := range allTemplates {
if template.Hidden {
continue
}
tuple := cmdio.Tuple{
Name: string(template.Id),
Id: template.Description,
}
names = append(names, tuple)
}
return names
}
// TODO CONTINUE defining the methods that the init command will finally rely on.
func PromptForTemplateId(ctx context.Context, ref, templateDir string) (TemplateId, error) {
if !cmdio.IsPromptSupported(ctx) {
return "", fmt.Errorf("please specify a template")
}
description, err := cmdio.SelectOrdered(ctx, options(), "Template to use")
if err != nil {
return "", err
}
for _, template := range allTemplates {
if template.Description == description {
return template.Id, nil
}
}
panic("this should never happen - template not found")
}
func (tmpl *Template) InitializeWriter(configPath string, outputFiler filer.Filer) {
tmpl.Writer.Initialize(tmpl.Reader, configPath, outputFiler)
}
func (tmpl *Template) SetReader(r Reader) {
tmpl.Reader = r
}
func Get(id TemplateId) *Template {
for _, template := range allTemplates {
if template.Id == id {
return &template
}
}
return nil
}

View File

@ -0,0 +1,15 @@
package template
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestTemplateHelpDescriptions(t *testing.T) {
expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows
- default-sql: The default SQL template for .sql files that run with Databricks SQL
- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)
- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)`
assert.Equal(t, expected, HelpDescriptions())
}

148
libs/template/writer.go Normal file
View File

@ -0,0 +1,148 @@
package template
import (
"context"
"errors"
"fmt"
"io/fs"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/filer"
)
// TODO: Add tests for these writers, mocking the cmdio library
// at the same time.
const (
libraryDirName = "library"
templateDirName = "template"
schemaFileName = "databricks_template_schema.json"
)
type Writer interface {
Initialize(reader Reader, configPath string, outputFiler filer.Filer)
Materialize(ctx context.Context) error
LogTelemetry(ctx context.Context) error
}
type defaultWriter struct {
reader Reader
configPath string
outputFiler filer.Filer
// Internal state
config *config
renderer *renderer
}
func (tmpl *defaultWriter) Initialize(reader Reader, configPath string, outputFiler filer.Filer) {
tmpl.configPath = configPath
tmpl.outputFiler = outputFiler
}
func (tmpl *defaultWriter) promptForInput(ctx context.Context) error {
readerFs, err := tmpl.reader.FS(ctx)
if err != nil {
return err
}
if _, err := fs.Stat(readerFs, schemaFileName); errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("not a bundle template: expected to find a template schema file at %s", schemaFileName)
}
tmpl.config, err = newConfig(ctx, readerFs, schemaFileName)
if err != nil {
return err
}
// Read and assign config values from file
if tmpl.configPath != "" {
err = tmpl.config.assignValuesFromFile(tmpl.configPath)
if err != nil {
return err
}
}
helpers := loadHelpers(ctx)
tmpl.renderer, err = newRenderer(ctx, tmpl.config.values, helpers, readerFs, templateDirName, libraryDirName)
if err != nil {
return err
}
// Print welcome message
welcome := tmpl.config.schema.WelcomeMessage
if welcome != "" {
welcome, err = tmpl.renderer.executeTemplate(welcome)
if err != nil {
return err
}
cmdio.LogString(ctx, welcome)
}
// Prompt user for any missing config values. Assign default values if
// terminal is not TTY
err = tmpl.config.promptOrAssignDefaultValues(tmpl.renderer)
if err != nil {
return err
}
return tmpl.config.validate()
}
func (tmpl *defaultWriter) printSuccessMessage(ctx context.Context) error {
success := tmpl.config.schema.SuccessMessage
if success == "" {
cmdio.LogString(ctx, "✨ Successfully initialized template")
return nil
}
success, err := tmpl.renderer.executeTemplate(success)
if err != nil {
return err
}
cmdio.LogString(ctx, success)
return nil
}
func (tmpl *defaultWriter) Materialize(ctx context.Context) error {
err := tmpl.promptForInput(ctx)
if err != nil {
return err
}
// Walk the template file tree and compute in-memory representations of the
// output files.
err = tmpl.renderer.walk()
if err != nil {
return err
}
// Flush the output files to disk.
err = tmpl.renderer.persistToDisk(ctx, tmpl.outputFiler)
if err != nil {
return err
}
return tmpl.printSuccessMessage(ctx)
}
func (tmpl *defaultWriter) LogTelemetry(ctx context.Context) error {
// no-op
return nil
}
type writerWithTelemetry struct {
defaultWriter
}
func (tmpl *writerWithTelemetry) LogTelemetry(ctx context.Context) error {
// Log telemetry. TODO.
return nil
}
func NewWriterWithTelemetry(reader Reader, configPath string, outputFiler filer.Filer) Writer {
return &writerWithTelemetry{
defaultWriter: defaultWriter{
reader: reader,
configPath: configPath,
outputFiler: outputFiler,
},
}
}