From b2b541b1bacb271b96fa88d76641d98d2cf4eac6 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sun, 9 Mar 2025 21:17:56 +0100 Subject: [PATCH] Add an experimental DLT CLI --- .gitignore | 2 + Makefile | 8 +- .../config/mutator/default_workspace_root.go | 15 ++- bundle/config/mutator/process_target_mode.go | 2 +- .../config/mutator/select_default_target.go | 13 +++ bundle/config/root.go | 68 ++++++++++--- bundle/config/target.go | 10 ++ bundle/deploy/files/upload.go | 10 +- bundle/deploy/state.go | 2 +- bundle/deploy/state_pull.go | 2 +- bundle/deploy/state_pull_test.go | 6 +- bundle/deploy/state_push.go | 2 +- bundle/deploy/state_push_test.go | 2 +- bundle/deploy/state_update.go | 4 +- bundle/deploy/state_update_test.go | 2 +- bundle/deploy/terraform/state_push.go | 10 +- bundle/deploy/terraform/state_push_test.go | 2 + bundle/permissions/set_owner_permissions.go | 23 +++++ bundle/phases/bind.go | 9 +- bundle/phases/deploy.go | 17 ++-- bundle/phases/initialize.go | 1 + bundle/run/options.go | 13 ++- clis/cli_types.go | 15 +++ clis/dab/dab.go | 43 +++++++++ clis/dlt/dlt.go | 43 +++++++++ cmd/auth/auth.go | 22 ++++- cmd/auth/login.go | 12 ++- cmd/bundle/bundle.go | 56 ++++++++--- cmd/bundle/deploy.go | 30 +++++- cmd/bundle/deployment/bind.go | 5 +- cmd/bundle/deployment/deployment.go | 14 +-- cmd/bundle/deployment/unbind.go | 5 +- cmd/bundle/destroy.go | 2 +- cmd/bundle/dryrun.go | 48 ++++++++++ cmd/bundle/generate.go | 9 +- cmd/bundle/init.go | 25 +++-- cmd/bundle/open.go | 13 ++- cmd/bundle/run.go | 95 +++++++++++++++++-- cmd/bundle/schema.go | 9 +- cmd/bundle/show.go | 24 +++++ cmd/bundle/summary.go | 13 ++- cmd/bundle/sync.go | 10 +- cmd/bundle/test.go | 8 +- cmd/bundle/validate.go | 18 +++- cmd/bundle/variables.go | 5 +- cmd/cmd.go | 3 +- cmd/root/bundle.go | 6 +- cmd/root/bundle_test.go | 6 +- cmd/root/io.go | 2 +- cmd/root/root.go | 4 +- integration/bundle/helpers_test.go | 3 +- libs/template/resolver.go | 5 +- libs/template/resolver_test.go | 13 +-- libs/template/template.go | 30 +++++- libs/template/template_test.go | 3 +- .../databricks_template_schema.json | 30 ++++++ .../template/{{.project_name}}/.gitignore | 6 ++ .../.vscode/__builtins__.pyi | 3 + .../{{.project_name}}/.vscode/extensions.json | 7 ++ .../.vscode/settings.json.tmpl | 22 +++++ .../template/{{.project_name}}/README.md.tmpl | 32 +++++++ .../data_sources/sample_taxis_raw.py.tmpl | 16 ++++ .../{{.project_name}}/databricks.yml.tmpl | 25 +++++ .../explorations/exploration.ipynb.tmpl | 56 +++++++++++ .../template/{{.project_name}}/job.yml.tmpl | 26 +++++ .../{{.project_name}}/pipeline.yml.tmpl | 18 ++++ .../transformations/sample_taxis.py.tmpl | 23 +++++ 67 files changed, 930 insertions(+), 156 deletions(-) create mode 100644 bundle/permissions/set_owner_permissions.go create mode 100644 clis/cli_types.go create mode 100644 clis/dab/dab.go create mode 100644 clis/dlt/dlt.go create mode 100644 cmd/bundle/dryrun.go create mode 100644 cmd/bundle/show.go create mode 100644 libs/template/templates/default-dlt/databricks_template_schema.json create mode 100644 libs/template/templates/default-dlt/template/{{.project_name}}/.gitignore create mode 100644 libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/__builtins__.pyi create mode 100644 libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/extensions.json create mode 100644 libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/settings.json.tmpl create mode 100644 libs/template/templates/default-dlt/template/{{.project_name}}/README.md.tmpl create mode 100644 libs/template/templates/default-dlt/template/{{.project_name}}/data_sources/sample_taxis_raw.py.tmpl create mode 100644 libs/template/templates/default-dlt/template/{{.project_name}}/databricks.yml.tmpl create mode 100644 libs/template/templates/default-dlt/template/{{.project_name}}/explorations/exploration.ipynb.tmpl create mode 100644 libs/template/templates/default-dlt/template/{{.project_name}}/job.yml.tmpl create mode 100644 libs/template/templates/default-dlt/template/{{.project_name}}/pipeline.yml.tmpl create mode 100644 libs/template/templates/default-dlt/template/{{.project_name}}/transformations/sample_taxis.py.tmpl diff --git a/.gitignore b/.gitignore index 2f6d0ad8e..e36619413 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ *.so *.dylib cli +dlt # Test binary, built with `go test -c` *.test @@ -28,5 +29,6 @@ __pycache__ .idea .vscode/launch.json .vscode/tasks.json +.databricks/ .ruff_cache diff --git a/Makefile b/Makefile index a66fa646d..90bea6e11 100644 --- a/Makefile +++ b/Makefile @@ -43,6 +43,12 @@ acc-showcover: build: vendor go build -mod vendor +dlt: + go build ./clis/dlt + +dab: + go build ./clis/dab + snapshot: go build -o .databricks/databricks @@ -63,4 +69,4 @@ integration: vendor integration-short: vendor VERBOSE_TEST=1 $(INTEGRATION) -short -.PHONY: lint tidy lintcheck fmt test cover showcover build snapshot vendor schema integration integration-short acc-cover acc-showcover docs +.PHONY: lint tidy lintcheck fmt test cover showcover build snapshot vendor schema integration integration-short acc-cover acc-showcover docs dlt dab diff --git a/bundle/config/mutator/default_workspace_root.go b/bundle/config/mutator/default_workspace_root.go index d7c24a5b5..b5a4ad503 100644 --- a/bundle/config/mutator/default_workspace_root.go +++ b/bundle/config/mutator/default_workspace_root.go @@ -24,6 +24,15 @@ func (m *defineDefaultWorkspaceRoot) Apply(ctx context.Context, b *bundle.Bundle return nil } + // FIXME: this shouldn't appear here + if b.Config.Project.Name != "" { + if b.Config.Bundle.Name != "" { + return diag.Errorf("project and bundle cannot both be set") + } + // TODO: properly copy all values from project to bundle + b.Config.Bundle.Name = b.Config.Project.Name + } + if b.Config.Bundle.Name == "" { return diag.Errorf("unable to define default workspace root: bundle name not defined") } @@ -32,8 +41,12 @@ func (m *defineDefaultWorkspaceRoot) Apply(ctx context.Context, b *bundle.Bundle return diag.Errorf("unable to define default workspace root: bundle target not selected") } + prefix := "~/" + if b.Config.Owner != "" { + prefix = "/Workspace/Users/" + b.Config.Owner + } b.Config.Workspace.RootPath = fmt.Sprintf( - "~/.bundle/%s/%s", + prefix+"/.bundle/%s/%s", b.Config.Bundle.Name, b.Config.Bundle.Target, ) diff --git a/bundle/config/mutator/process_target_mode.go b/bundle/config/mutator/process_target_mode.go index 8ee59d809..81039f100 100644 --- a/bundle/config/mutator/process_target_mode.go +++ b/bundle/config/mutator/process_target_mode.go @@ -151,7 +151,7 @@ func validateProductionMode(b *bundle.Bundle, isPrincipalUsed bool) diag.Diagnos " root_path: /Workspace/Users/%s/.bundle/${bundle.name}/${bundle.target}", b.Config.Workspace.CurrentUser.UserName, ) - if !isExplicitRootSet(b) { + if !isExplicitRootSet(b) && b.Config.Owner == "" { if isRunAsSet(r) || isPrincipalUsed { // Just setting run_as is not enough to guarantee a single deployment, // and neither is setting a principal. diff --git a/bundle/config/mutator/select_default_target.go b/bundle/config/mutator/select_default_target.go index 4ac0aae6f..08eb586d8 100644 --- a/bundle/config/mutator/select_default_target.go +++ b/bundle/config/mutator/select_default_target.go @@ -5,6 +5,7 @@ import ( "strings" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/libs/diag" "golang.org/x/exp/maps" ) @@ -44,6 +45,18 @@ func (m *selectDefaultTarget) Apply(ctx context.Context, b *bundle.Bundle) diag. return diag.Errorf("multiple targets are marked as default (%s)", strings.Join(defaults, ", ")) } + // Still no default? Then use development mode as a fallback. + // We support this as an optional fallback because it's a common + // pattern to have a single development environment, and it + // helps make databricks.yml even more concise. + if len(defaults) == 0 { + for name, env := range b.Config.Targets { + if env != nil && env.Mode == config.Development { + defaults = append(defaults, name) + } + } + } + // If no target has the `default` flag set, ask the user to specify one. if len(defaults) == 0 { return diag.Errorf("please specify target") diff --git a/bundle/config/root.go b/bundle/config/root.go index d44e25fa2..586eb9e00 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -29,6 +29,9 @@ type Root struct { // version of the spec (TODO), default cluster, default warehouse, etc. Bundle Bundle `json:"bundle,omitempty"` + // Project is an alias for bundle. + Project Bundle `json:"project,omitempty"` + // Include specifies a list of patterns of file names to load and // merge into the this configuration. Only includes defined in the root // `databricks.yml` are processed. Defaults to an empty list. @@ -57,6 +60,16 @@ type Root struct { // Sync section specifies options for files synchronization Sync Sync `json:"sync,omitempty"` + // DeployOnRun determines if the bundle should be deployed before running. + // This is useful for development workflows where you want to deploy + // changes before running a job or pipeline. + DeployOnRun bool `json:"deploy_on_run,omitempty"` + + // The owner of this deployment. This property is used to set the permissions + // for the deployment and to determine the default deployment path + // when 'mode: production' is used. + Owner string `json:"owner,omitempty"` + // RunAs section allows to define an execution identity for jobs and pipelines runs RunAs *jobs.JobRunAs `json:"run_as,omitempty"` @@ -298,8 +311,8 @@ func (r *Root) MergeTargetOverrides(name string) error { return err } - // Confirm validity of variable overrides. - err = validateVariableOverrides(root, target) + // Ensure validity of variable overrides. + root, err = ensureValidVariables(root, target) if err != nil { return err } @@ -313,6 +326,8 @@ func (r *Root) MergeTargetOverrides(name string) error { "sync", "permissions", "presets", + "deploy_on_run", + "owner", } { if root, err = mergeField(root, target, f); err != nil { return err @@ -328,12 +343,18 @@ func (r *Root) MergeTargetOverrides(name string) error { if vDefault.Kind() != dyn.KindInvalid { defaultPath := varPath.Append(dyn.Key("default")) root, err = dyn.SetByPath(root, defaultPath, vDefault) + if err != nil { + return root, err + } } vLookup := variable.Get("lookup") if vLookup.Kind() != dyn.KindInvalid { lookupPath := varPath.Append(dyn.Key("lookup")) root, err = dyn.SetByPath(root, lookupPath, vLookup) + if err != nil { + return root, err + } } return root, err @@ -501,36 +522,53 @@ func rewriteShorthands(v dyn.Value) (dyn.Value, error) { })) } -// validateVariableOverrides checks that all variables specified +// ensureValidVariables makes sure that all variables specified // in the target override are also defined in the root. -func validateVariableOverrides(root, target dyn.Value) (err error) { +func ensureValidVariables(root, target dyn.Value) (dyn.Value, error) { var rv map[string]variable.Variable var tv map[string]variable.Variable - // Collect variables from the root. - if v := root.Get("variables"); v.Kind() != dyn.KindInvalid { - err = convert.ToTyped(&rv, v) + // Collect variables from the target. + if v := target.Get("variables"); v.Kind() != dyn.KindInvalid { + err := convert.ToTyped(&tv, v) if err != nil { - return fmt.Errorf("unable to collect variables from root: %w", err) + return root, fmt.Errorf("unable to collect variables from target: %w", err) } } - // Collect variables from the target. - if v := target.Get("variables"); v.Kind() != dyn.KindInvalid { - err = convert.ToTyped(&tv, v) - if err != nil { - return fmt.Errorf("unable to collect variables from target: %w", err) + rootVars := root.Get("variables") + if rootVars.Kind() == dyn.KindInvalid { + // No root variables are declared. We treat these as optional: + // it's okay not to specify the for brevity, but then we cannot + // check the target variables for consistency. + // _, err = dyn.Set(root, "variables", dyn.V(map[string]dyn.Value{})) + targetVars := map[string]dyn.Value{} + for k := range tv { + targetVars[k] = dyn.V(map[string]dyn.Value{ + "default": dyn.V(tv[k].Default), + }) } + root, err := dyn.Set(root, "variables", dyn.V(targetVars)) + if err != nil { + return root, fmt.Errorf("unable to create variables map in root: %w", err) + } + return root, nil + } + + // Collect variables from the root. + err := convert.ToTyped(&rv, rootVars) + if err != nil { + return root, fmt.Errorf("unable to collect variables from root: %w", err) } // Check that all variables in the target exist in the root. for k := range tv { if _, ok := rv[k]; !ok { - return fmt.Errorf("variable %s is not defined but is assigned a value", k) + return root, fmt.Errorf("variable %s is not defined but is assigned a value", k) } } - return nil + return root, nil } // Best effort to get the location of configuration value at the specified path. diff --git a/bundle/config/target.go b/bundle/config/target.go index fae9c940b..32ffe75ea 100644 --- a/bundle/config/target.go +++ b/bundle/config/target.go @@ -68,6 +68,16 @@ type Target struct { Sync *Sync `json:"sync,omitempty"` + // DeployOnRun determines if the bundle should be deployed before running. + // This is useful for development workflows where you want to deploy + // changes before running a job or pipeline. + DeployOnRun bool `json:"deploy_on_run,omitempty"` + + // The owner of this deployment. This property is used to set the permissions + // for the deployment and to determine the default deployment path + // when 'mode: production' is used. + Owner string `json:"owner,omitempty"` + Permissions []resources.Permission `json:"permissions,omitempty"` } diff --git a/bundle/deploy/files/upload.go b/bundle/deploy/files/upload.go index bb46c97c9..690d6a9a8 100644 --- a/bundle/deploy/files/upload.go +++ b/bundle/deploy/files/upload.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/permissions" + "github.com/databricks/cli/clis" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/log" @@ -17,6 +18,7 @@ import ( type upload struct { outputHandler sync.OutputHandler + cliType clis.CLIType } func (m *upload) Name() string { @@ -29,7 +31,9 @@ func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return nil } - cmdio.LogString(ctx, fmt.Sprintf("Uploading bundle files to %s...", b.Config.Workspace.FilePath)) + if m.cliType != clis.DLT { + cmdio.LogString(ctx, fmt.Sprintf("Uploading files to %s...", b.Config.Workspace.FilePath)) + } opts, err := GetSyncOptions(ctx, b) if err != nil { return diag.FromErr(err) @@ -54,6 +58,6 @@ func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return nil } -func Upload(outputHandler sync.OutputHandler) bundle.Mutator { - return &upload{outputHandler} +func Upload(outputHandler sync.OutputHandler, cliType clis.CLIType) bundle.Mutator { + return &upload{outputHandler, cliType} } diff --git a/bundle/deploy/state.go b/bundle/deploy/state.go index 6e285034a..db762141b 100644 --- a/bundle/deploy/state.go +++ b/bundle/deploy/state.go @@ -177,7 +177,7 @@ func loadState(r io.Reader) (*DeploymentState, error) { return &s, nil } -func getPathToStateFile(ctx context.Context, b *bundle.Bundle) (string, error) { +func GetPathToStateFile(ctx context.Context, b *bundle.Bundle) (string, error) { cacheDir, err := b.CacheDir(ctx) if err != nil { return "", fmt.Errorf("cannot get bundle cache directory: %w", err) diff --git a/bundle/deploy/state_pull.go b/bundle/deploy/state_pull.go index 844dcb77e..2bfd73457 100644 --- a/bundle/deploy/state_pull.go +++ b/bundle/deploy/state_pull.go @@ -39,7 +39,7 @@ func (s *statePull) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic return nil } - statePath, err := getPathToStateFile(ctx, b) + statePath, err := GetPathToStateFile(ctx, b) if err != nil { return diag.FromErr(err) } diff --git a/bundle/deploy/state_pull_test.go b/bundle/deploy/state_pull_test.go index b3d838fce..0938f2d7d 100644 --- a/bundle/deploy/state_pull_test.go +++ b/bundle/deploy/state_pull_test.go @@ -103,7 +103,7 @@ func testStatePull(t *testing.T, opts statePullOpts) { } if opts.localState != nil { - statePath, err := getPathToStateFile(ctx, b) + statePath, err := GetPathToStateFile(ctx, b) require.NoError(t, err) data, err := json.Marshal(opts.localState) @@ -117,7 +117,7 @@ func testStatePull(t *testing.T, opts statePullOpts) { require.NoError(t, diags.Error()) // Check that deployment state was written - statePath, err := getPathToStateFile(ctx, b) + statePath, err := GetPathToStateFile(ctx, b) require.NoError(t, err) data, err := os.ReadFile(statePath) @@ -274,7 +274,7 @@ func TestStatePullNoState(t *testing.T) { require.NoError(t, diags.Error()) // Check that deployment state was not written - statePath, err := getPathToStateFile(ctx, b) + statePath, err := GetPathToStateFile(ctx, b) require.NoError(t, err) _, err = os.Stat(statePath) diff --git a/bundle/deploy/state_push.go b/bundle/deploy/state_push.go index 176a907c8..7ed703b72 100644 --- a/bundle/deploy/state_push.go +++ b/bundle/deploy/state_push.go @@ -24,7 +24,7 @@ func (s *statePush) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic return diag.FromErr(err) } - statePath, err := getPathToStateFile(ctx, b) + statePath, err := GetPathToStateFile(ctx, b) if err != nil { return diag.FromErr(err) } diff --git a/bundle/deploy/state_push_test.go b/bundle/deploy/state_push_test.go index 3562ec147..75886569a 100644 --- a/bundle/deploy/state_push_test.go +++ b/bundle/deploy/state_push_test.go @@ -58,7 +58,7 @@ func TestStatePush(t *testing.T) { ctx := context.Background() - statePath, err := getPathToStateFile(ctx, b) + statePath, err := GetPathToStateFile(ctx, b) require.NoError(t, err) state := DeploymentState{ diff --git a/bundle/deploy/state_update.go b/bundle/deploy/state_update.go index 5488d50ed..9269f6bdf 100644 --- a/bundle/deploy/state_update.go +++ b/bundle/deploy/state_update.go @@ -51,7 +51,7 @@ func (s *stateUpdate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnost state.ID = uuid.New() } - statePath, err := getPathToStateFile(ctx, b) + statePath, err := GetPathToStateFile(ctx, b) if err != nil { return diag.FromErr(err) } @@ -82,7 +82,7 @@ func StateUpdate() bundle.Mutator { func load(ctx context.Context, b *bundle.Bundle) (*DeploymentState, error) { // If the file does not exist, return a new DeploymentState. - statePath, err := getPathToStateFile(ctx, b) + statePath, err := GetPathToStateFile(ctx, b) if err != nil { return nil, err } diff --git a/bundle/deploy/state_update_test.go b/bundle/deploy/state_update_test.go index 04c5579a8..00e08b8b5 100644 --- a/bundle/deploy/state_update_test.go +++ b/bundle/deploy/state_update_test.go @@ -101,7 +101,7 @@ func TestStateUpdateWithExistingState(t *testing.T) { ctx := context.Background() // Create an existing state file. - statePath, err := getPathToStateFile(ctx, b) + statePath, err := GetPathToStateFile(ctx, b) require.NoError(t, err) state := &DeploymentState{ diff --git a/bundle/deploy/terraform/state_push.go b/bundle/deploy/terraform/state_push.go index 6cdde1371..e6b39e61f 100644 --- a/bundle/deploy/terraform/state_push.go +++ b/bundle/deploy/terraform/state_push.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/deploy" + "github.com/databricks/cli/clis" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/filer" @@ -17,6 +18,7 @@ import ( type statePush struct { filerFactory deploy.FilerFactory + cliType clis.CLIType } func (l *statePush) Name() string { @@ -48,7 +50,9 @@ func (l *statePush) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic defer local.Close() // Upload state file from local cache directory to filer. - cmdio.LogString(ctx, "Updating deployment state...") + if l.cliType != clis.DLT { + cmdio.LogString(ctx, "Updating deployment state...") + } log.Infof(ctx, "Writing local state file to remote state directory") err = f.Write(ctx, TerraformStateFileName, local, filer.CreateParentDirectories, filer.OverwriteIfExists) if err != nil { @@ -58,6 +62,6 @@ func (l *statePush) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic return nil } -func StatePush() bundle.Mutator { - return &statePush{deploy.StateFiler} +func StatePush(cliType clis.CLIType) bundle.Mutator { + return &statePush{deploy.StateFiler, cliType} } diff --git a/bundle/deploy/terraform/state_push_test.go b/bundle/deploy/terraform/state_push_test.go index e022dee1b..9f2e7fb56 100644 --- a/bundle/deploy/terraform/state_push_test.go +++ b/bundle/deploy/terraform/state_push_test.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/clis" mockfiler "github.com/databricks/cli/internal/mocks/libs/filer" "github.com/databricks/cli/libs/filer" "github.com/stretchr/testify/assert" @@ -49,6 +50,7 @@ func TestStatePush(t *testing.T) { m := &statePush{ identityFiler(mock), + clis.General, } ctx := context.Background() diff --git a/bundle/permissions/set_owner_permissions.go b/bundle/permissions/set_owner_permissions.go new file mode 100644 index 000000000..b5e21f6b2 --- /dev/null +++ b/bundle/permissions/set_owner_permissions.go @@ -0,0 +1,23 @@ +package permissions + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +type setOwnerPermissions struct{} + +func SetOwnerPermissions() bundle.Mutator { + return &setOwnerPermissions{} +} + +func (m *setOwnerPermissions) Name() string { + return "SetOwnerPermissions" +} + +func (m *setOwnerPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + // TODO: set CAN_MANAGE permissions based on the 'owner' property + return nil +} diff --git a/bundle/phases/bind.go b/bundle/phases/bind.go index ae54e8657..2c58fa710 100644 --- a/bundle/phases/bind.go +++ b/bundle/phases/bind.go @@ -6,11 +6,12 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/terraform" + "github.com/databricks/cli/clis" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/log" ) -func Bind(ctx context.Context, b *bundle.Bundle, opts *terraform.BindOptions) (diags diag.Diagnostics) { +func Bind(ctx context.Context, b *bundle.Bundle, opts *terraform.BindOptions, cliType clis.CLIType) (diags diag.Diagnostics) { log.Info(ctx, "Phase: bind") diags = bundle.Apply(ctx, b, lock.Acquire()) @@ -27,13 +28,13 @@ func Bind(ctx context.Context, b *bundle.Bundle, opts *terraform.BindOptions) (d terraform.Interpolate(), terraform.Write(), terraform.Import(opts), - terraform.StatePush(), + terraform.StatePush(cliType), )) return diags } -func Unbind(ctx context.Context, b *bundle.Bundle, resourceType, resourceKey string) (diags diag.Diagnostics) { +func Unbind(ctx context.Context, b *bundle.Bundle, resourceType, resourceKey string, cliType clis.CLIType) (diags diag.Diagnostics) { log.Info(ctx, "Phase: unbind") diags = bundle.Apply(ctx, b, lock.Acquire()) @@ -50,7 +51,7 @@ func Unbind(ctx context.Context, b *bundle.Bundle, resourceType, resourceKey str terraform.Interpolate(), terraform.Write(), terraform.Unbind(resourceType, resourceKey), - terraform.StatePush(), + terraform.StatePush(cliType), )) return diags diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 02e0e9bd8..65e23819c 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -18,6 +18,7 @@ import ( "github.com/databricks/cli/bundle/permissions" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/bundle/trampoline" + "github.com/databricks/cli/clis" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/log" @@ -126,16 +127,18 @@ is removed from the catalog, but the underlying files are not deleted:` return approved, nil } -func deployCore(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { +func deployCore(ctx context.Context, b *bundle.Bundle, cliType clis.CLIType) diag.Diagnostics { // Core mutators that CRUD resources and modify deployment state. These // mutators need informed consent if they are potentially destructive. - cmdio.LogString(ctx, "Deploying resources...") + if cliType != clis.DLT { + cmdio.LogString(ctx, "Deploying resources...") + } diags := bundle.Apply(ctx, b, terraform.Apply()) // following original logic, continuing with sequence below even if terraform had errors diags = diags.Extend(bundle.ApplySeq(ctx, b, - terraform.StatePush(), + terraform.StatePush(cliType), terraform.Load(), apps.InterpolateVariables(), apps.UploadConfig(), @@ -143,7 +146,7 @@ func deployCore(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { metadata.Upload(), )) - if !diags.HasError() { + if !diags.HasError() && cliType != clis.DLT { cmdio.LogString(ctx, "Deployment complete!") } @@ -151,7 +154,7 @@ func deployCore(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { } // The deploy phase deploys artifacts and resources. -func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler) (diags diag.Diagnostics) { +func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler, cliType clis.CLIType) (diags diag.Diagnostics) { log.Info(ctx, "Phase: deploy") // Core mutators that CRUD resources and modify deployment state. These @@ -185,7 +188,7 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand libraries.CheckForSameNameLibraries(), libraries.Upload(), trampoline.TransformWheelTask(), - files.Upload(outputHandler), + files.Upload(outputHandler, cliType), deploy.StateUpdate(), deploy.StatePush(), permissions.ApplyWorkspaceRootPermissions(), @@ -206,7 +209,7 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand } if haveApproval { - diags = diags.Extend(deployCore(ctx, b)) + diags = diags.Extend(deployCore(ctx, b, cliType)) } else { cmdio.LogString(ctx, "Deployment cancelled!") } diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 1da5b61f4..171b96508 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -80,6 +80,7 @@ func Initialize(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { mutator.CaptureSchemaDependency(), // Provide permission config errors & warnings after initializing all variables + permissions.SetOwnerPermissions(), permissions.PermissionDiagnostics(), mutator.SetRunAs(), mutator.OverrideCompute(), diff --git a/bundle/run/options.go b/bundle/run/options.go index 4e50788a9..98d9dea2a 100644 --- a/bundle/run/options.go +++ b/bundle/run/options.go @@ -1,6 +1,7 @@ package run import ( + "github.com/databricks/cli/clis" "github.com/databricks/cli/libs/cmdgroup" "github.com/spf13/cobra" ) @@ -11,16 +12,22 @@ type Options struct { NoWait bool } -func (o *Options) Define(cmd *cobra.Command) { +func (o *Options) Define(cmd *cobra.Command, cliType clis.CLIType) { + if cliType == clis.DLT { + // Only show the DLT flags, and don't group them + o.Pipeline.Define(cmd.Flags()) + return + } + jobGroup := cmdgroup.NewFlagGroup("Job") o.Job.DefineJobOptions(jobGroup.FlagSet()) jobTaskGroup := cmdgroup.NewFlagGroup("Job Task") jobTaskGroup.SetDescription(`Note: please prefer use of job-level parameters (--param) over task-level parameters. - For more information, see https://docs.databricks.com/en/workflows/jobs/create-run-jobs.html#pass-parameters-to-a-databricks-job-task`) +For more information, see https://docs.databricks.com/en/workflows/jobs/create-run-jobs.html#pass-parameters-to-a-databricks-job-task`) o.Job.DefineTaskOptions(jobTaskGroup.FlagSet()) - pipelineGroup := cmdgroup.NewFlagGroup("Pipeline") + pipelineGroup := cmdgroup.NewFlagGroup("DLT") o.Pipeline.Define(pipelineGroup.FlagSet()) wrappedCmd := cmdgroup.NewCommandWithGroupFlag(cmd) diff --git a/clis/cli_types.go b/clis/cli_types.go new file mode 100644 index 000000000..09564fd7b --- /dev/null +++ b/clis/cli_types.go @@ -0,0 +1,15 @@ +package clis + +// CLIType represents the type of CLI being used +type CLIType int + +const ( + // General is the standard CLI with all commands + General CLIType = iota + + // DLT is the CLI focused on DLT/bundle functionality + DLT + + // DABs is the CLI focused only on bundle functionality + DAB +) diff --git a/clis/dab/dab.go b/clis/dab/dab.go new file mode 100644 index 000000000..1c3c9fa67 --- /dev/null +++ b/clis/dab/dab.go @@ -0,0 +1,43 @@ +package main + +import ( + "context" + "os" + "strings" + + "github.com/databricks/cli/clis" + "github.com/databricks/cli/cmd" + "github.com/databricks/cli/cmd/bundle" + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func main() { + ctx := context.Background() + bundleCmd := bundle.New(clis.General) + + // HACK: copy functionionality from root command + rootCmd := cmd.New(ctx) + root.InitTargetFlag(bundleCmd) + bundleCmd.PersistentPreRunE = rootCmd.PersistentPreRunE + + // HACK: Replace "databricks bundle" with "dab" in all command descriptions + replaceCommandDescriptions(bundleCmd) + + err := root.Execute(ctx, bundleCmd) + if err != nil { + os.Exit(1) + } +} + +// replaceCommandDescriptions recursively replaces "databricks bundle" with "dab" in all command Long descriptions +func replaceCommandDescriptions(cmd *cobra.Command) { + if cmd.Long != "" { + cmd.Long = strings.ReplaceAll(cmd.Long, "databricks bundle", "dab") + } + + // Recursively process all subcommands + for _, subCmd := range cmd.Commands() { + replaceCommandDescriptions(subCmd) + } +} diff --git a/clis/dlt/dlt.go b/clis/dlt/dlt.go new file mode 100644 index 000000000..a320b424e --- /dev/null +++ b/clis/dlt/dlt.go @@ -0,0 +1,43 @@ +package main + +import ( + "context" + "os" + "strings" + + "github.com/databricks/cli/clis" + "github.com/databricks/cli/cmd" + "github.com/databricks/cli/cmd/bundle" + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func main() { + ctx := context.Background() + bundleCmd := bundle.New(clis.DLT) + + // HACK: copy functionality from root command + rootCmd := cmd.New(ctx) + root.InitTargetFlag(bundleCmd) + bundleCmd.PersistentPreRunE = rootCmd.PersistentPreRunE + + // HACK: Replace "databricks bundle" with "dlt" in all command descriptions + replaceCommandDescriptions(bundleCmd) + + err := root.Execute(ctx, bundleCmd) + if err != nil { + os.Exit(1) + } +} + +// replaceCommandDescriptions recursively replaces "databricks bundle" with "dlt" in all command Long descriptions +func replaceCommandDescriptions(cmd *cobra.Command) { + if cmd.Long != "" { + cmd.Long = strings.ReplaceAll(cmd.Long, "databricks bundle", "dlt") + } + + // Recursively process all subcommands + for _, subCmd := range cmd.Commands() { + replaceCommandDescriptions(subCmd) + } +} diff --git a/cmd/auth/auth.go b/cmd/auth/auth.go index 4261e93e7..585b478d0 100644 --- a/cmd/auth/auth.go +++ b/cmd/auth/auth.go @@ -22,18 +22,30 @@ Azure: https://learn.microsoft.com/azure/databricks/dev-tools/auth GCP: https://docs.gcp.databricks.com/dev-tools/auth/index.html`, } - var perisistentAuth auth.PersistentAuth - cmd.PersistentFlags().StringVar(&perisistentAuth.Host, "host", perisistentAuth.Host, "Databricks Host") - cmd.PersistentFlags().StringVar(&perisistentAuth.AccountID, "account-id", perisistentAuth.AccountID, "Databricks Account ID") + var persistentAuth auth.PersistentAuth + cmd.PersistentFlags().StringVar(&persistentAuth.Host, "host", persistentAuth.Host, "Databricks Host") + cmd.PersistentFlags().StringVar(&persistentAuth.AccountID, "account-id", persistentAuth.AccountID, "Databricks Account ID") + hidden := false cmd.AddCommand(newEnvCommand()) - cmd.AddCommand(newLoginCommand(&perisistentAuth)) + cmd.AddCommand(newLoginCommand(hidden, &persistentAuth)) cmd.AddCommand(newProfilesCommand()) - cmd.AddCommand(newTokenCommand(&perisistentAuth)) + cmd.AddCommand(newTokenCommand(&persistentAuth)) cmd.AddCommand(newDescribeCommand()) return cmd } +// NewTopLevelLoginCommand creates a new login command for use in a top-level command group. +// This is useful for custom CLIs where the 'auth' command group does not exist. +func NewTopLevelLoginCommand(hidden bool) *cobra.Command { + var persistentAuth auth.PersistentAuth + cmd := newLoginCommand(hidden, &persistentAuth) + cmd.Flags().StringP("profile", "p", "", "~/.databrickscfg profile") + cmd.Flags().StringVar(&persistentAuth.Host, "host", persistentAuth.Host, "Databricks Host") + cmd.Flags().StringVar(&persistentAuth.AccountID, "account-id", persistentAuth.AccountID, "Databricks Account ID") + return cmd +} + func promptForHost(ctx context.Context) (string, error) { if !cmdio.IsInTTY(ctx) { return "", errors.New("the command is being run in a non-interactive environment, please specify a host using --host") diff --git a/cmd/auth/login.go b/cmd/auth/login.go index a6d0bf4cc..d15115a79 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -23,8 +23,11 @@ func promptForProfile(ctx context.Context, defaultValue string) (string, error) } prompt := cmdio.Prompt(ctx) - prompt.Label = "Databricks profile name" prompt.Default = defaultValue + if defaultValue == "" { + defaultValue = "DEFAULT" + } + prompt.Label = fmt.Sprintf("Databricks profile name [%s]", defaultValue) prompt.AllowEdit = true return prompt.Run() } @@ -34,14 +37,15 @@ const ( defaultTimeout = 1 * time.Hour ) -func newLoginCommand(persistentAuth *auth.PersistentAuth) *cobra.Command { +func newLoginCommand(hidden bool, persistentAuth *auth.PersistentAuth) *cobra.Command { defaultConfigPath := "~/.databrickscfg" if runtime.GOOS == "windows" { defaultConfigPath = "%USERPROFILE%\\.databrickscfg" } cmd := &cobra.Command{ - Use: "login [HOST]", - Short: "Log into a Databricks workspace or account", + Use: "login [HOST]", + Hidden: hidden, + Short: "Log into a Databricks workspace or account", Long: fmt.Sprintf(`Log into a Databricks workspace or account. This command logs you into the Databricks workspace or account and saves the authentication configuration in a profile (in %s by default). diff --git a/cmd/bundle/bundle.go b/cmd/bundle/bundle.go index fb88cd7d0..1709cb20d 100644 --- a/cmd/bundle/bundle.go +++ b/cmd/bundle/bundle.go @@ -1,32 +1,58 @@ package bundle import ( + "github.com/databricks/cli/clis" + "github.com/databricks/cli/cmd/auth" "github.com/databricks/cli/cmd/bundle/deployment" + "github.com/databricks/cli/cmd/root" "github.com/spf13/cobra" ) -func New() *cobra.Command { +func New(cliType clis.CLIType) *cobra.Command { cmd := &cobra.Command{ Use: "bundle", - Short: "Databricks Asset Bundles let you express data/AI/analytics projects as code.", - Long: "Databricks Asset Bundles let you express data/AI/analytics projects as code.\n\nOnline documentation: https://docs.databricks.com/en/dev-tools/bundles/index.html", + Short: "Manage Databricks assets as code", + Long: "Databricks Asset Bundles let you express data/AI/analytics projects as code.\n\nOnline documentation: https://docs.databricks.com/dev-tools/bundles", GroupID: "development", } - initVariableFlag(cmd) - cmd.AddCommand(newDeployCommand()) + hideForDLT := cliType == clis.DLT + showForDLT := cliType == clis.General || cliType == clis.DAB + hideForGeneralCLI := cliType == clis.General + hideAlways := true + + if cliType == clis.DLT { + cmd.Use = "dlt" + cmd.Short = "Use DLT to build efficient & scalable data pipelines." + cmd.Long = cmd.Short + "\n\nOnline documentation: https://docs.databricks.com/delta-live-tables" + } + + initVariableFlag(cmd, hideForDLT) + cmd.AddCommand(newDeployCommand(cliType)) cmd.AddCommand(newDestroyCommand()) cmd.AddCommand(newLaunchCommand()) - cmd.AddCommand(newRunCommand()) - cmd.AddCommand(newSchemaCommand()) - cmd.AddCommand(newSyncCommand()) - cmd.AddCommand(newTestCommand()) - cmd.AddCommand(newValidateCommand()) - cmd.AddCommand(newInitCommand()) - cmd.AddCommand(newSummaryCommand()) - cmd.AddCommand(newGenerateCommand()) + cmd.AddCommand(newRunCommand(cliType)) + cmd.AddCommand(newDryRunCommand(showForDLT)) + cmd.AddCommand(newSchemaCommand(hideForDLT)) + cmd.AddCommand(newSyncCommand(hideForDLT)) + cmd.AddCommand(newTestCommand(hideAlways)) + cmd.AddCommand(newShowCommand(hideAlways)) + validateCmd := newValidateCommand(hideForDLT, cliType) + cmd.AddCommand(validateCmd) + cmd.AddCommand(newInitCommand(cliType)) + summaryCmd := newSummaryCommand(hideForDLT, cliType) + cmd.AddCommand(summaryCmd) + cmd.AddCommand(newGenerateCommand(hideForDLT)) cmd.AddCommand(newDebugCommand()) - cmd.AddCommand(deployment.NewDeploymentCommand()) - cmd.AddCommand(newOpenCommand()) + cmd.AddCommand(deployment.NewDeploymentCommand(hideForDLT, cliType)) + cmd.AddCommand(newOpenCommand(cliType)) + cmd.AddCommand(auth.NewTopLevelLoginCommand(hideForGeneralCLI)) + + if cliType != clis.General { + // HACK: set the output flag locally for the summary and validate commands + root.InitOutputFlag(summaryCmd) + root.InitOutputFlag(validateCmd) + } + return cmd } diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index 407a14d8d..3a7ad9772 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -5,20 +5,24 @@ import ( "fmt" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/config/validate" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/bundle/render" + "github.com/databricks/cli/clis" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/sync" + "github.com/fatih/color" "github.com/spf13/cobra" ) -func newDeployCommand() *cobra.Command { +func newDeployCommand(cliType clis.CLIType) *cobra.Command { cmd := &cobra.Command{ Use: "deploy", - Short: "Deploy bundle", + Short: "Deploy to a workspace", Args: root.NoArgs, } @@ -35,8 +39,10 @@ func newDeployCommand() *cobra.Command { cmd.Flags().StringVarP(&clusterId, "cluster-id", "c", "", "Override cluster in the deployment with the given cluster ID.") cmd.Flags().BoolVar(&autoApprove, "auto-approve", false, "Skip interactive approvals that might be required for deployment.") cmd.Flags().MarkDeprecated("compute-id", "use --cluster-id instead") + if cliType == clis.DLT { + cmd.Flags().MarkHidden("compute-id") + } cmd.Flags().BoolVar(&verbose, "verbose", false, "Enable verbose output.") - // Verbose flag currently only affects file sync output, it's used by the vscode extension cmd.Flags().MarkHidden("verbose") cmd.RunE = func(cmd *cobra.Command, args []string) error { @@ -68,6 +74,10 @@ func newDeployCommand() *cobra.Command { sync.TextOutput(ctx, c, cmd.OutOrStdout()) } } + if cliType == clis.DLT { + // DLT CLI has very minimalistic output + cmdio.LogString(ctx, fmt.Sprintf("Deploying to target '%s'...", b.Config.Bundle.Target)) + } diags = diags.Extend(phases.Initialize(ctx, b)) @@ -80,7 +90,19 @@ func newDeployCommand() *cobra.Command { } if !diags.HasError() { - diags = diags.Extend(phases.Deploy(ctx, b, outputHandler)) + diags = diags.Extend(phases.Deploy(ctx, b, outputHandler, cliType)) + } + + if cliType == clis.DLT { + if len(b.Config.Resources.Pipelines) == 1 { + diags = diags.Extend(bundle.ApplySeq(ctx, b, mutator.InitializeURLs())) + for _, pipeline := range b.Config.Resources.Pipelines { + fmt.Println("Deployed to " + color.CyanString(pipeline.URL)) + break + } + } else { + fmt.Println("TIP: Use the 'dlt.run' command to see all deployed resources.") + } } } diff --git a/cmd/bundle/deployment/bind.go b/cmd/bundle/deployment/bind.go index b11984c51..f2aa79623 100644 --- a/cmd/bundle/deployment/bind.go +++ b/cmd/bundle/deployment/bind.go @@ -7,6 +7,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/phases" + "github.com/databricks/cli/clis" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" @@ -14,7 +15,7 @@ import ( "github.com/spf13/cobra" ) -func newBindCommand() *cobra.Command { +func newBindCommand(cliType clis.CLIType) *cobra.Command { cmd := &cobra.Command{ Use: "bind KEY RESOURCE_ID", Short: "Bind bundle-defined resources to existing resources", @@ -60,7 +61,7 @@ func newBindCommand() *cobra.Command { ResourceType: resource.TerraformResourceName(), ResourceKey: args[0], ResourceId: args[1], - })) + }, cliType)) } if err := diags.Error(); err != nil { return fmt.Errorf("failed to bind the resource, err: %w", err) diff --git a/cmd/bundle/deployment/deployment.go b/cmd/bundle/deployment/deployment.go index d29a8e72b..cc24de848 100644 --- a/cmd/bundle/deployment/deployment.go +++ b/cmd/bundle/deployment/deployment.go @@ -1,17 +1,19 @@ package deployment import ( + "github.com/databricks/cli/clis" "github.com/spf13/cobra" ) -func NewDeploymentCommand() *cobra.Command { +func NewDeploymentCommand(hidden bool, cliType clis.CLIType) *cobra.Command { cmd := &cobra.Command{ - Use: "deployment", - Short: "Deployment related commands", - Long: "Deployment related commands", + Use: "deployment", + Short: "Deployment related commands", + Long: "Deployment related commands", + Hidden: hidden, } - cmd.AddCommand(newBindCommand()) - cmd.AddCommand(newUnbindCommand()) + cmd.AddCommand(newBindCommand(cliType)) + cmd.AddCommand(newUnbindCommand(cliType)) return cmd } diff --git a/cmd/bundle/deployment/unbind.go b/cmd/bundle/deployment/unbind.go index 3fe5fbce6..82f96a5fc 100644 --- a/cmd/bundle/deployment/unbind.go +++ b/cmd/bundle/deployment/unbind.go @@ -5,13 +5,14 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/phases" + "github.com/databricks/cli/clis" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/diag" "github.com/spf13/cobra" ) -func newUnbindCommand() *cobra.Command { +func newUnbindCommand(cliType clis.CLIType) *cobra.Command { cmd := &cobra.Command{ Use: "unbind KEY", Short: "Unbind bundle-defined resources from its managed remote resource", @@ -40,7 +41,7 @@ func newUnbindCommand() *cobra.Command { diags = phases.Initialize(ctx, b) if !diags.HasError() { - diags = diags.Extend(phases.Unbind(ctx, b, resource.TerraformResourceName(), args[0])) + diags = diags.Extend(phases.Unbind(ctx, b, resource.TerraformResourceName(), args[0], cliType)) } if err := diags.Error(); err != nil { return err diff --git a/cmd/bundle/destroy.go b/cmd/bundle/destroy.go index 82580f994..ee5c3e4ff 100644 --- a/cmd/bundle/destroy.go +++ b/cmd/bundle/destroy.go @@ -20,7 +20,7 @@ import ( func newDestroyCommand() *cobra.Command { cmd := &cobra.Command{ Use: "destroy", - Short: "Destroy deployed bundle resources", + Short: "Destroy deployed resources", Args: root.NoArgs, } diff --git a/cmd/bundle/dryrun.go b/cmd/bundle/dryrun.go new file mode 100644 index 000000000..1858be163 --- /dev/null +++ b/cmd/bundle/dryrun.go @@ -0,0 +1,48 @@ +package bundle + +import ( + "github.com/databricks/cli/bundle/run" + "github.com/databricks/cli/clis" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +func newDryRunCommand(hidden bool) *cobra.Command { + cmd := &cobra.Command{ + Use: "dry-run [flags] KEY", + Short: "Start a dry run", + Long: `Start a dry run of the DLT pipeline identified by KEY. +This command is a short-hand for 'databricks bundle run --validate-only KEY + +The KEY is the unique identifier of the resource to run, for example: + + databricks bundle dry-run my_dlt +`, + Hidden: hidden, + } + runCmd := newRunCommand(clis.DLT) + + var pipelineOpts run.PipelineOptions + pipelineOpts.Define(cmd.Flags()) + + // Reuse the run command's implementation but with our pipeline options + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + + err := runCmd.Flags().Set("validate-only", "true") + if err != nil { + return err + } + + err = runCmd.RunE(cmd, nil) + if err != nil { + return err + } + + cmdio.LogString(ctx, "✅ dry run successful, no problems found!") + return nil + } + cmd.ValidArgsFunction = runCmd.ValidArgsFunction + + return cmd +} diff --git a/cmd/bundle/generate.go b/cmd/bundle/generate.go index d09c6feb4..b73803f98 100644 --- a/cmd/bundle/generate.go +++ b/cmd/bundle/generate.go @@ -5,13 +5,14 @@ import ( "github.com/spf13/cobra" ) -func newGenerateCommand() *cobra.Command { +func newGenerateCommand(hidden bool) *cobra.Command { var key string cmd := &cobra.Command{ - Use: "generate", - Short: "Generate bundle configuration", - Long: "Generate bundle configuration", + Use: "generate", + Short: "Generate bundle configuration", + Long: "Generate bundle configuration", + Hidden: hidden, } cmd.AddCommand(generate.NewGenerateJobCommand()) diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 1911abe19..98e3c5569 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -4,26 +4,31 @@ import ( "errors" "fmt" + "github.com/databricks/cli/clis" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) -func newInitCommand() *cobra.Command { +func newInitCommand(cliType clis.CLIType) *cobra.Command { cmd := &cobra.Command{ Use: "init [TEMPLATE_PATH]", Short: "Initialize using a bundle template", Args: root.MaximumNArgs(1), - Long: fmt.Sprintf(`Initialize using a bundle template. - -TEMPLATE_PATH optionally specifies which template to use. It can be one of the following: -%s -- a local file system path with a template directory -- a Git repository URL, e.g. https://github.com/my/repository - -See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.`, template.HelpDescriptions()), } + if cliType == clis.DLT { + cmd.Short = "Initialize a new DLT project" + cmd.Long = "Initialize a new DLT project" + } + cmd.Long = fmt.Sprintf(cmd.Short+` + + TEMPLATE_PATH optionally specifies which template to use. It can be one of the following: + %s + - a local file system path with a template directory + - a Git repository URL, e.g. https://github.com/my/repository + + See https://docs.databricks.com/dev-tools/bundles/templates.html for more information on templates.`, template.HelpDescriptions()) var configFile string var outputDir string @@ -56,7 +61,7 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf } ctx := cmd.Context() - tmpl, err := r.Resolve(ctx) + tmpl, err := r.Resolve(ctx, cliType) if errors.Is(err, template.ErrCustomSelected) { cmdio.LogString(ctx, "Please specify a path or Git repository to use a custom template.") cmdio.LogString(ctx, "See https://docs.databricks.com/en/dev-tools/bundles/templates.html to learn more about custom templates.") diff --git a/cmd/bundle/open.go b/cmd/bundle/open.go index 733758a8e..a6ba8cc95 100644 --- a/cmd/bundle/open.go +++ b/cmd/bundle/open.go @@ -12,6 +12,7 @@ import ( "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/bundle/resources" + "github.com/databricks/cli/clis" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" @@ -37,7 +38,13 @@ func promptOpenArgument(ctx context.Context, b *bundle.Bundle) (string, error) { return key, nil } -func resolveOpenArgument(ctx context.Context, b *bundle.Bundle, args []string) (string, error) { +func resolveOpenArgument(ctx context.Context, b *bundle.Bundle, args []string, cliType clis.CLIType) (string, error) { + // DLT CLI: if there is a single pipeline, just run it without prompting. + runnableResources := computeRunnableResourcesMap(b, cliType) + if len(args) == 0 && cliType == clis.DLT && len(runnableResources) == 1 { + return maps.Values(runnableResources)[0], nil + } + // If no arguments are specified, prompt the user to select the resource to open. if len(args) == 0 && cmdio.IsPromptSupported(ctx) { return promptOpenArgument(ctx, b) @@ -50,7 +57,7 @@ func resolveOpenArgument(ctx context.Context, b *bundle.Bundle, args []string) ( return args[0], nil } -func newOpenCommand() *cobra.Command { +func newOpenCommand(cliType clis.CLIType) *cobra.Command { cmd := &cobra.Command{ Use: "open", Short: "Open a resource in the browser", @@ -72,7 +79,7 @@ func newOpenCommand() *cobra.Command { return err } - arg, err := resolveOpenArgument(ctx, b, args) + arg, err := resolveOpenArgument(ctx, b, args, cliType) if err != nil { return err } diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index 574ad1016..7c6879ccf 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -5,13 +5,17 @@ import ( "encoding/json" "errors" "fmt" + "os" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/deploy" + "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/bundle/resources" "github.com/databricks/cli/bundle/run" "github.com/databricks/cli/bundle/run/output" + "github.com/databricks/cli/clis" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" @@ -20,15 +24,21 @@ import ( "golang.org/x/exp/maps" ) -func promptRunArgument(ctx context.Context, b *bundle.Bundle) (string, error) { +func computeRunnableResourcesMap(b *bundle.Bundle, cliType clis.CLIType) map[string]string { // Compute map of "Human readable name of resource" -> "resource key". inv := make(map[string]string) for k, ref := range resources.Completions(b, run.IsRunnable) { + if cliType == clis.DLT && ref.Description.SingularTitle != "Pipeline" { + continue + } title := fmt.Sprintf("%s: %s", ref.Description.SingularTitle, ref.Resource.GetName()) inv[title] = k } + return inv +} - key, err := cmdio.Select(ctx, inv, "Resource to run") +func promptRunArgument(ctx context.Context, b *bundle.Bundle, cliType clis.CLIType, runnable map[string]string) (string, error) { + key, err := cmdio.Select(ctx, runnable, "Resource to run") if err != nil { return "", err } @@ -38,10 +48,16 @@ func promptRunArgument(ctx context.Context, b *bundle.Bundle) (string, error) { // resolveRunArgument resolves the resource key to run. // It returns the remaining arguments to pass to the runner, if applicable. -func resolveRunArgument(ctx context.Context, b *bundle.Bundle, args []string) (string, []string, error) { +func resolveRunArgument(ctx context.Context, b *bundle.Bundle, args []string, cliType clis.CLIType) (string, []string, error) { + // DLT CLI: if there is a single pipeline, just run it without prompting. + runnableResources := computeRunnableResourcesMap(b, cliType) + if len(args) == 0 && cliType == clis.DLT && len(runnableResources) == 1 { + return maps.Values(runnableResources)[0], args, nil + } + // If no arguments are specified, prompt the user to select something to run. if len(args) == 0 && cmdio.IsPromptSupported(ctx) { - key, err := promptRunArgument(ctx, b) + key, err := promptRunArgument(ctx, b, cliType, runnableResources) if err != nil { return "", nil, err } @@ -71,7 +87,7 @@ func keyToRunner(b *bundle.Bundle, arg string) (run.Runner, error) { return runner, nil } -func newRunCommand() *cobra.Command { +func newRunCommand(cliType clis.CLIType) *cobra.Command { cmd := &cobra.Command{ Use: "run [flags] KEY", Short: "Run a job or pipeline update", @@ -95,9 +111,15 @@ If the specified job does not use job parameters and the job has a Python file task or a Python wheel task, the second example applies. `, } + if cliType == clis.DLT { + cmd.Short = "Run a DLT update" + cmd.Long = `Run the DLT identified by KEY. + +Example: dlt run my_dlt` + } var runOptions run.Options - runOptions.Define(cmd) + runOptions.Define(cmd, cliType) var noWait bool var restart bool @@ -116,7 +138,7 @@ task or a Python wheel task, the second example applies. return err } - key, args, err := resolveRunArgument(ctx, b, args) + key, args, err := resolveRunArgument(ctx, b, args, cliType) if err != nil { return err } @@ -142,6 +164,13 @@ task or a Python wheel task, the second example applies. return err } + if b.Config.DeployOnRun { + err = deployOnRun(ctx, b, cliType) + if err != nil { + return err + } + } + runOptions.NoWait = noWait var output output.RunOutput if restart { @@ -209,3 +238,55 @@ task or a Python wheel task, the second example applies. return cmd } + +func deployOnRun(ctx context.Context, b *bundle.Bundle, cliType clis.CLIType) error { + changesDetected, err := detectChanges(ctx, b) + if err != nil { + return err + } + + if changesDetected { + cmdio.LogString(ctx, fmt.Sprintf("Deploying to target '%s' since deploy_on_run is enabled for this project...", b.Config.Bundle.Target)) + diags := phases.Build(ctx, b) + diags = diags.Extend(phases.Deploy(ctx, b, nil, cliType)) + if diags.HasError() { + return diags.Error() + } + } else { + cmdio.LogString(ctx, fmt.Sprintf("No changes detected for target '%s', skipping deployment", b.Config.Bundle.Target)) + } + return nil +} + +// detectChanges checks if there are any changes to the files that have not been deployed yet. +// HACK: the logic here is a bit crude; we should refine it to be more accurate. +func detectChanges(ctx context.Context, b *bundle.Bundle) (bool, error) { + sync, err := files.GetSync(ctx, b) + if err != nil { + return false, err + } + + list, err := sync.GetFileList(ctx) + if err != nil { + return false, err + } + + stateFile, err := deploy.GetPathToStateFile(ctx, b) + if err != nil { + return false, err + } + info, err := os.Stat(stateFile) + if err != nil { + return false, err + } + + changesDetected := false + for _, file := range list { + if file.Modified().After(info.ModTime()) { + changesDetected = true + break + } + } + + return changesDetected, nil +} diff --git a/cmd/bundle/schema.go b/cmd/bundle/schema.go index 480618ed7..e797cedc5 100644 --- a/cmd/bundle/schema.go +++ b/cmd/bundle/schema.go @@ -6,11 +6,12 @@ import ( "github.com/spf13/cobra" ) -func newSchemaCommand() *cobra.Command { +func newSchemaCommand(hidden bool) *cobra.Command { cmd := &cobra.Command{ - Use: "schema", - Short: "Generate JSON Schema for bundle configuration", - Args: root.NoArgs, + Use: "schema", + Short: "Generate JSON Schema for bundle configuration", + Args: root.NoArgs, + Hidden: hidden, } cmd.RunE = func(cmd *cobra.Command, args []string) error { diff --git a/cmd/bundle/show.go b/cmd/bundle/show.go new file mode 100644 index 000000000..c6188d602 --- /dev/null +++ b/cmd/bundle/show.go @@ -0,0 +1,24 @@ +package bundle + +import ( + "errors" + + "github.com/spf13/cobra" +) + +func newShowCommand(hidden bool) *cobra.Command { + cmd := &cobra.Command{ + Use: "show", + Short: "Show a preview for a table", + Long: `Show a preview for a table.`, + + // We're not ready to expose this command until we specify its semantics. + Hidden: hidden, + } + + cmd.RunE = func(cmd *cobra.Command, args []string) error { + return errors.New("TODO") + } + + return cmd +} diff --git a/cmd/bundle/summary.go b/cmd/bundle/summary.go index a0e93b78b..e7b6de8b1 100644 --- a/cmd/bundle/summary.go +++ b/cmd/bundle/summary.go @@ -12,17 +12,22 @@ import ( "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/bundle/render" + "github.com/databricks/cli/clis" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/flags" "github.com/spf13/cobra" ) -func newSummaryCommand() *cobra.Command { +func newSummaryCommand(hidden bool, cliType clis.CLIType) *cobra.Command { cmd := &cobra.Command{ - Use: "summary", - Short: "Summarize resources deployed by this bundle", - Args: root.NoArgs, + Use: "summary", + Short: "Summarize resources deployed by this bundle", + Args: root.NoArgs, + Hidden: hidden, + } + if cliType == clis.DLT { + cmd.Short = "Summarize all resources that are part of this project" } var forcePull bool diff --git a/cmd/bundle/sync.go b/cmd/bundle/sync.go index 25475206d..54dde8131 100644 --- a/cmd/bundle/sync.go +++ b/cmd/bundle/sync.go @@ -50,11 +50,13 @@ func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, b *bundle.Bundle) return opts, nil } -func newSyncCommand() *cobra.Command { +func newSyncCommand(hiddenForDLT bool) *cobra.Command { cmd := &cobra.Command{ - Use: "sync [flags]", - Short: "Synchronize bundle tree to the workspace", - Args: root.NoArgs, + Use: "sync [flags]", + Short: "Synchronize source files to a workspace", + Long: "Synchronize source files to a workspace", + Hidden: hiddenForDLT, + Args: root.NoArgs, } var f syncFlags diff --git a/cmd/bundle/test.go b/cmd/bundle/test.go index 794575220..a9b4076db 100644 --- a/cmd/bundle/test.go +++ b/cmd/bundle/test.go @@ -6,14 +6,14 @@ import ( "github.com/spf13/cobra" ) -func newTestCommand() *cobra.Command { +func newTestCommand(hidden bool) *cobra.Command { cmd := &cobra.Command{ Use: "test", - Short: "run tests for the project", - Long: `This is longer description of the command`, + Short: "Run tests for this project", + Long: `Run tests for this project.`, // We're not ready to expose this command until we specify its semantics. - Hidden: true, + Hidden: hidden, } cmd.RunE = func(cmd *cobra.Command, args []string) error { diff --git a/cmd/bundle/validate.go b/cmd/bundle/validate.go index 0a902806f..92b858afc 100644 --- a/cmd/bundle/validate.go +++ b/cmd/bundle/validate.go @@ -9,8 +9,10 @@ import ( "github.com/databricks/cli/bundle/config/validate" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/bundle/render" + "github.com/databricks/cli/clis" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/flags" "github.com/spf13/cobra" ) @@ -26,11 +28,12 @@ func renderJsonOutput(cmd *cobra.Command, b *bundle.Bundle) error { return nil } -func newValidateCommand() *cobra.Command { +func newValidateCommand(hidden bool, cliType clis.CLIType) *cobra.Command { cmd := &cobra.Command{ - Use: "validate", - Short: "Validate configuration", - Args: root.NoArgs, + Use: "validate", + Short: "Validate configuration", + Args: root.NoArgs, + Hidden: hidden, } cmd.RunE = func(cmd *cobra.Command, args []string) error { @@ -45,6 +48,13 @@ func newValidateCommand() *cobra.Command { } } + if cliType == clis.DLT { + diags = diags.Extend(diag.Diagnostics{{ + Summary: "Use dry-run command to do a dry run of all DLT definitions in this project", + Severity: diag.Recommendation, + }}) + } + if !diags.HasError() { diags = diags.Extend(phases.Initialize(ctx, b)) } diff --git a/cmd/bundle/variables.go b/cmd/bundle/variables.go index f8f5167ea..78b7110be 100644 --- a/cmd/bundle/variables.go +++ b/cmd/bundle/variables.go @@ -4,6 +4,9 @@ import ( "github.com/spf13/cobra" ) -func initVariableFlag(cmd *cobra.Command) { +func initVariableFlag(cmd *cobra.Command, hidden bool) { cmd.PersistentFlags().StringSlice("var", []string{}, `set values for variables defined in bundle config. Example: --var="foo=bar"`) + if hidden { + cmd.PersistentFlags().MarkHidden("var") + } } diff --git a/cmd/cmd.go b/cmd/cmd.go index 4f5337fd3..41836c44b 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -4,6 +4,7 @@ import ( "context" "strings" + "github.com/databricks/cli/clis" "github.com/databricks/cli/cmd/account" "github.com/databricks/cli/cmd/api" "github.com/databricks/cli/cmd/auth" @@ -69,7 +70,7 @@ func New(ctx context.Context) *cobra.Command { // Add other subcommands. cli.AddCommand(api.New()) cli.AddCommand(auth.New()) - cli.AddCommand(bundle.New()) + cli.AddCommand(bundle.New(clis.General)) cli.AddCommand(configure.New()) cli.AddCommand(fs.New()) cli.AddCommand(labs.New(ctx)) diff --git a/cmd/root/bundle.go b/cmd/root/bundle.go index d86f9a673..772321b8a 100644 --- a/cmd/root/bundle.go +++ b/cmd/root/bundle.go @@ -168,16 +168,16 @@ func targetCompletion(cmd *cobra.Command, args []string, toComplete string) ([]s return maps.Keys(b.Config.Targets), cobra.ShellCompDirectiveDefault } -func initTargetFlag(cmd *cobra.Command) { +func InitTargetFlag(cmd *cobra.Command) { // To operate in the context of a bundle, all commands must take an "target" parameter. - cmd.PersistentFlags().StringP("target", "t", "", "bundle target to use (if applicable)") + cmd.PersistentFlags().StringP("target", "t", "", "deployment target to use (if applicable)") cmd.RegisterFlagCompletionFunc("target", targetCompletion) } // DEPRECATED flag func initEnvironmentFlag(cmd *cobra.Command) { // To operate in the context of a bundle, all commands must take an "environment" parameter. - cmd.PersistentFlags().StringP("environment", "e", "", "bundle target to use (if applicable)") + cmd.PersistentFlags().StringP("environment", "e", "", "deployment target to use (if applicable)") cmd.PersistentFlags().MarkDeprecated("environment", "use --target flag instead") cmd.RegisterFlagCompletionFunc("environment", targetCompletion) } diff --git a/cmd/root/bundle_test.go b/cmd/root/bundle_test.go index 5871b0ae9..7464f2048 100644 --- a/cmd/root/bundle_test.go +++ b/cmd/root/bundle_test.go @@ -209,7 +209,7 @@ func TestBundleConfigureProfileFlagAndEnvVariable(t *testing.T) { func TestTargetFlagFull(t *testing.T) { cmd := emptyCommand(t) - initTargetFlag(cmd) + InitTargetFlag(cmd) cmd.SetArgs([]string{"version", "--target", "development"}) ctx := context.Background() @@ -221,7 +221,7 @@ func TestTargetFlagFull(t *testing.T) { func TestTargetFlagShort(t *testing.T) { cmd := emptyCommand(t) - initTargetFlag(cmd) + InitTargetFlag(cmd) cmd.SetArgs([]string{"version", "-t", "production"}) ctx := context.Background() @@ -234,7 +234,7 @@ func TestTargetFlagShort(t *testing.T) { // TODO: remove when environment flag is fully deprecated func TestTargetEnvironmentFlag(t *testing.T) { cmd := emptyCommand(t) - initTargetFlag(cmd) + InitTargetFlag(cmd) initEnvironmentFlag(cmd) cmd.SetArgs([]string{"version", "--environment", "development"}) diff --git a/cmd/root/io.go b/cmd/root/io.go index bba989a79..cab0dc404 100644 --- a/cmd/root/io.go +++ b/cmd/root/io.go @@ -13,7 +13,7 @@ type outputFlag struct { output flags.Output } -func initOutputFlag(cmd *cobra.Command) *outputFlag { +func InitOutputFlag(cmd *cobra.Command) *outputFlag { f := outputFlag{ output: flags.OutputText, } diff --git a/cmd/root/root.go b/cmd/root/root.go index 04815f48b..351636593 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -39,10 +39,10 @@ func New(ctx context.Context) *cobra.Command { // Initialize flags logFlags := initLogFlags(cmd) progressLoggerFlag := initProgressLoggerFlag(cmd, logFlags) - outputFlag := initOutputFlag(cmd) + outputFlag := InitOutputFlag(cmd) initProfileFlag(cmd) initEnvironmentFlag(cmd) - initTargetFlag(cmd) + InitTargetFlag(cmd) cmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() diff --git a/integration/bundle/helpers_test.go b/integration/bundle/helpers_test.go index 7b889bdd5..02885e6f3 100644 --- a/integration/bundle/helpers_test.go +++ b/integration/bundle/helpers_test.go @@ -11,6 +11,7 @@ import ( "strings" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/clis" "github.com/databricks/cli/internal/testcli" "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/cmdio" @@ -45,7 +46,7 @@ func initTestTemplateWithBundleRoot(t testutil.TestingT, ctx context.Context, te OutputDir: bundleRoot, } - tmpl, err := r.Resolve(ctx) + tmpl, err := r.Resolve(ctx, clis.General) require.NoError(t, err) defer tmpl.Reader.Cleanup(ctx) diff --git a/libs/template/resolver.go b/libs/template/resolver.go index 2cc8bf1c7..cd91db0ac 100644 --- a/libs/template/resolver.go +++ b/libs/template/resolver.go @@ -5,6 +5,7 @@ import ( "errors" "strings" + "github.com/databricks/cli/clis" "github.com/databricks/cli/libs/git" ) @@ -56,7 +57,7 @@ var ErrCustomSelected = errors.New("custom template selected") // Configures the reader and the writer for template and returns // a handle to the template. // Prompts the user if needed. -func (r Resolver) Resolve(ctx context.Context) (*Template, error) { +func (r Resolver) Resolve(ctx context.Context, cliType clis.CLIType) (*Template, error) { if r.Tag != "" && r.Branch != "" { return nil, errors.New("only one of tag or branch can be specified") } @@ -73,7 +74,7 @@ func (r Resolver) Resolve(ctx context.Context) (*Template, error) { if r.TemplatePathOrUrl == "" { // Prompt the user to select a template // if a template path or URL is not provided. - templateName, err = SelectTemplate(ctx) + templateName, err = SelectTemplate(ctx, cliType) if err != nil { return nil, err } diff --git a/libs/template/resolver_test.go b/libs/template/resolver_test.go index 1dee1c45f..ef2e95b91 100644 --- a/libs/template/resolver_test.go +++ b/libs/template/resolver_test.go @@ -4,6 +4,7 @@ import ( "context" "testing" + "github.com/databricks/cli/clis" "github.com/databricks/cli/libs/cmdio" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -15,7 +16,7 @@ func TestTemplateResolverBothTagAndBranch(t *testing.T) { Branch: "branch", } - _, err := r.Resolve(context.Background()) + _, err := r.Resolve(context.Background(), clis.General) assert.EqualError(t, err, "only one of tag or branch can be specified") } @@ -23,7 +24,7 @@ func TestTemplateResolverErrorsWhenPromptingIsNotSupported(t *testing.T) { r := Resolver{} ctx := cmdio.MockDiscard(context.Background()) - _, err := r.Resolve(ctx) + _, err := r.Resolve(ctx, clis.General) assert.EqualError(t, err, "prompting is not supported. Please specify the path, name or URL of the template to use") } @@ -38,7 +39,7 @@ func TestTemplateResolverForDefaultTemplates(t *testing.T) { TemplatePathOrUrl: name, } - tmpl, err := r.Resolve(context.Background()) + tmpl, err := r.Resolve(context.Background(), clis.General) require.NoError(t, err) assert.Equal(t, &builtinReader{name: name}, tmpl.Reader) @@ -52,7 +53,7 @@ func TestTemplateResolverForDefaultTemplates(t *testing.T) { ConfigFile: "/config/file", } - tmpl, err := r.Resolve(context.Background()) + tmpl, err := r.Resolve(context.Background(), clis.General) require.NoError(t, err) // Assert reader and writer configuration @@ -69,7 +70,7 @@ func TestTemplateResolverForCustomUrl(t *testing.T) { ConfigFile: "/config/file", } - tmpl, err := r.Resolve(context.Background()) + tmpl, err := r.Resolve(context.Background(), clis.General) require.NoError(t, err) assert.Equal(t, Custom, tmpl.name) @@ -89,7 +90,7 @@ func TestTemplateResolverForCustomPath(t *testing.T) { ConfigFile: "/config/file", } - tmpl, err := r.Resolve(context.Background()) + tmpl, err := r.Resolve(context.Background(), clis.General) require.NoError(t, err) assert.Equal(t, Custom, tmpl.name) diff --git a/libs/template/template.go b/libs/template/template.go index 44834436b..652a3686b 100644 --- a/libs/template/template.go +++ b/libs/template/template.go @@ -7,6 +7,7 @@ import ( "slices" "strings" + "github.com/databricks/cli/clis" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/git" ) @@ -26,6 +27,7 @@ type TemplateName string const ( DefaultPython TemplateName = "default-python" DefaultSql TemplateName = "default-sql" + DefaultDLT TemplateName = "default-dlt" DbtSql TemplateName = "dbt-sql" MlopsStacks TemplateName = "mlops-stacks" DefaultPydabs TemplateName = "default-pydabs" @@ -36,10 +38,22 @@ const ( var databricksTemplates = []Template{ { name: DefaultPython, - description: "The default Python template for Notebooks / Delta Live Tables / Workflows", + description: "The default Python template for Notebooks / DLT / Workflows", Reader: &builtinReader{name: string(DefaultPython)}, Writer: &writerWithFullTelemetry{}, }, + { + name: DefaultDLT, + description: "The default DLT template", + Reader: &builtinReader{name: string(DefaultDLT)}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: DefaultSql, + description: "The default SQL template for .sql files that run with Databricks SQL", + Reader: &builtinReader{name: string(DefaultSql)}, + Writer: &writerWithFullTelemetry{}, + }, { name: DefaultSql, description: "The default SQL template for .sql files that run with Databricks SQL", @@ -87,12 +101,20 @@ func HelpDescriptions() string { var customTemplateDescription = "Bring your own template" -func options() []cmdio.Tuple { +func options(cliType clis.CLIType) []cmdio.Tuple { names := make([]cmdio.Tuple, 0, len(databricksTemplates)) for _, template := range databricksTemplates { if template.hidden { continue } + if cliType == clis.DLT && (template.name != DefaultDLT && template.name != Custom) { + // Only show DLT templates for DLT CLI + continue + } + if cliType != clis.DLT && template.name == DefaultDLT { + // Hide experimental DLT template in General CLI + continue + } tuple := cmdio.Tuple{ Name: string(template.name), Id: template.description, @@ -107,11 +129,11 @@ func options() []cmdio.Tuple { return names } -func SelectTemplate(ctx context.Context) (TemplateName, error) { +func SelectTemplate(ctx context.Context, cliType clis.CLIType) (TemplateName, error) { if !cmdio.IsPromptSupported(ctx) { return "", errors.New("prompting is not supported. Please specify the path, name or URL of the template to use") } - description, err := cmdio.SelectOrdered(ctx, options(), "Template to use") + description, err := cmdio.SelectOrdered(ctx, options(cliType), "Template to use") if err != nil { return "", err } diff --git a/libs/template/template_test.go b/libs/template/template_test.go index 80391e58b..f32f3c6fe 100644 --- a/libs/template/template_test.go +++ b/libs/template/template_test.go @@ -3,6 +3,7 @@ package template import ( "testing" + "github.com/databricks/cli/clis" "github.com/databricks/cli/libs/cmdio" "github.com/stretchr/testify/assert" ) @@ -23,7 +24,7 @@ func TestTemplateOptions(t *testing.T) { {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, {Name: "custom...", Id: "Bring your own template"}, } - assert.Equal(t, expected, options()) + assert.Equal(t, expected, options(clis.General)) } func TestBundleInitRepoName(t *testing.T) { diff --git a/libs/template/templates/default-dlt/databricks_template_schema.json b/libs/template/templates/default-dlt/databricks_template_schema.json new file mode 100644 index 000000000..9d1aaa6be --- /dev/null +++ b/libs/template/templates/default-dlt/databricks_template_schema.json @@ -0,0 +1,30 @@ +{ + "welcome_message": "\nCreating a new DLT project.", + "properties": { + "project_name": { + "type": "string", + "default": "my_dlt_project", + "description": "\nPlease provide a unique name for this project\nproject_name", + "order": 1, + "pattern": "^[A-Za-z0-9_]+$", + "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." + }, + "default_catalog": { + "type": "string", + "default": "{{default_catalog}}", + "pattern": "^\\w*$", + "pattern_match_failure_message": "Invalid catalog name.", + "description": "\nPlease provide a default catalog to use{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}\ndefault_catalog", + "order": 2 + }, + "default_schema": { + "type": "string", + "default": "", + "pattern": "^\\w*$", + "pattern_match_failure_message": "Invalid schema name.", + "description": "\nPlease provide a default schema to use. Leave this blank to use the current username (recommended for collaboration).\ndefault_schema", + "order": 3 + } + }, + "success_message": "\n{{if (eq .default_schema \"\")}}Schema defaults to {{short_name}} for the current user.\n{{end}}\nWorkspace host defaults to {{workspace_host}} (edit in {{.project_name}}/databricks.yml).\n\n✨ Your new DLT project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions." +} diff --git a/libs/template/templates/default-dlt/template/{{.project_name}}/.gitignore b/libs/template/templates/default-dlt/template/{{.project_name}}/.gitignore new file mode 100644 index 000000000..fd93f8369 --- /dev/null +++ b/libs/template/templates/default-dlt/template/{{.project_name}}/.gitignore @@ -0,0 +1,6 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ diff --git a/libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/__builtins__.pyi b/libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/extensions.json b/libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/extensions.json new file mode 100644 index 000000000..5d15eba36 --- /dev/null +++ b/libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/settings.json.tmpl b/libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/settings.json.tmpl new file mode 100644 index 000000000..2f753e89e --- /dev/null +++ b/libs/template/templates/default-dlt/template/{{.project_name}}/.vscode/settings.json.tmpl @@ -0,0 +1,22 @@ +{ + "python.analysis.stubPath": ".vscode", + "databricks.python.envFile": "${workspaceFolder}/.env", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + {{- /* Unfortunately extraPaths doesn't support globs!! See: https://github.com/microsoft/pylance-release/issues/973 */}} + "python.analysis.extraPaths": ["assets/etl_pipeline"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + }, +} diff --git a/libs/template/templates/default-dlt/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-dlt/template/{{.project_name}}/README.md.tmpl new file mode 100644 index 000000000..68e3ae546 --- /dev/null +++ b/libs/template/templates/default-dlt/template/{{.project_name}}/README.md.tmpl @@ -0,0 +1,32 @@ +# {{.project_name}} + +The '{{.project_name}}' project was generated by using the default-dlt template. + +## Getting started + +1. Install the DLT CLI + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ dlt login + ``` + +3. Edit the transformations for your DLT code under transformations/! + +4. To run your DLT code, type + ``` + $ dlt run + ``` + +5. To do a "dry run" to validate correctness of your DLT code, type + ``` + $ dlt dry-run + ``` + +6. To inspect your DLT pipeline in a browser, type + ``` + $ dlt open + ``` + +7. That's all you need to get started! Please reference the [DLT documentation](https://docs.databricks.com/delta-live-tables/) + to learn more about DLT and about how this project can be configured. diff --git a/libs/template/templates/default-dlt/template/{{.project_name}}/data_sources/sample_taxis_raw.py.tmpl b/libs/template/templates/default-dlt/template/{{.project_name}}/data_sources/sample_taxis_raw.py.tmpl new file mode 100644 index 000000000..ae21f7049 --- /dev/null +++ b/libs/template/templates/default-dlt/template/{{.project_name}}/data_sources/sample_taxis_raw.py.tmpl @@ -0,0 +1,16 @@ +""" +The 'data_sources' folder contains definitions for all data sources +used by the pipeline. Keeping them separate provides a clear overview +of the data used and allows for easy swapping of sources during development. +""" + +import dlt + +{{- $suffix := .project_name }} +{{- if (eq .project_name "my_dlt_project") }} + {{- $suffix = short_name }} +{{- end }} + +@dlt.view(comment="Sample raw taxi data") +def taxis_raw_{{$suffix}}(): + return spark.sql("SELECT * FROM samples.nyctaxi.trips LIMIT 10") diff --git a/libs/template/templates/default-dlt/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-dlt/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 000000000..3445a5605 --- /dev/null +++ b/libs/template/templates/default-dlt/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,25 @@ +# This defines the {{.project_name}} project. +project: + name: {{.project_name}} + +include: + - ./*.yml + +targets: + dev: + mode: development + deploy_on_run: true + workspace: + host: {{workspace_host}} + variables: + catalog: {{default_catalog}} + schema: {{if (eq .default_schema "")}}${workspace.current_user.short_name} # the current username, e.g. {{short_name}}{{else}}{{.default_schema}}{{end}} + + prod: + mode: production + owner: {{user_name}} + workspace: + host: {{workspace_host}} + variables: + catalog: {{.default_catalog}} + schema: {{if (eq .default_schema "")}}default{{else}}{{.default_schema}}{{end}} diff --git a/libs/template/templates/default-dlt/template/{{.project_name}}/explorations/exploration.ipynb.tmpl b/libs/template/templates/default-dlt/template/{{.project_name}}/explorations/exploration.ipynb.tmpl new file mode 100644 index 000000000..2d9cf04c1 --- /dev/null +++ b/libs/template/templates/default-dlt/template/{{.project_name}}/explorations/exploration.ipynb.tmpl @@ -0,0 +1,56 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", + "showTitle": false, + "title": "" + } + }, + "outputs": [], +{{- $suffix := .project_name }} +{{- if (eq .project_name "my_dlt_project") }} + {{- $suffix = short_name }} +{{- end }} + "source": [ + "-- The 'explorations' folder is used for ad-hoc notebooks\n", + "-- to explore the data produced by this pipeline.\n", + "\n", + "USE CATALOG `{{.default_catalog}}`;\n", + "USE SCHEMA `{{.default_schema}}`;\n", + "\n", + "SELECT * from sample_taxis_{{$suffix}};" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "ipynb-notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/default-dlt/template/{{.project_name}}/job.yml.tmpl b/libs/template/templates/default-dlt/template/{{.project_name}}/job.yml.tmpl new file mode 100644 index 000000000..bd7715f1d --- /dev/null +++ b/libs/template/templates/default-dlt/template/{{.project_name}}/job.yml.tmpl @@ -0,0 +1,26 @@ +{{/* skip this file for now! */}} +{{skip "job.yml"}} + +# The job that triggers the {{.project_name}} pipeline. +resources: + jobs: + {{.project_name}}_job: + name: {{.project_name}}_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + {{- if not is_service_principal}} + + email_notifications: + on_failure: {{user_name}} + + {{- end}} + + tasks: + - task_key: refresh_pipeline + pipeline_task: + pipeline_id: ${resources.pipelines.{{.project_name}}.id} diff --git a/libs/template/templates/default-dlt/template/{{.project_name}}/pipeline.yml.tmpl b/libs/template/templates/default-dlt/template/{{.project_name}}/pipeline.yml.tmpl new file mode 100644 index 000000000..23f7d0976 --- /dev/null +++ b/libs/template/templates/default-dlt/template/{{.project_name}}/pipeline.yml.tmpl @@ -0,0 +1,18 @@ +# The configuration for the {{.project_name}} pipeline. +resources: + pipelines: + {{.project_name}}: + name: {{.project_name}} + serverless: true + {{- if or (eq .default_catalog "") (eq .default_catalog "hive_metastore")}} + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + # catalog: ${var.catalog} + {{- else}} + catalog: ${var.catalog} + {{- end}} + schema: ${var.schema} + libraries: + - file: + path: data_sources/** + - file: + path: transformations/** diff --git a/libs/template/templates/default-dlt/template/{{.project_name}}/transformations/sample_taxis.py.tmpl b/libs/template/templates/default-dlt/template/{{.project_name}}/transformations/sample_taxis.py.tmpl new file mode 100644 index 000000000..bbccea6de --- /dev/null +++ b/libs/template/templates/default-dlt/template/{{.project_name}}/transformations/sample_taxis.py.tmpl @@ -0,0 +1,23 @@ +""" +The 'transformations' folder contains all transformations +applied in this pipeline. +Documentation: https://docs.databricks.com/delta-live-tables/python-ref.html +""" + +import dlt +from pyspark.sql.functions import to_date, count +from pyspark.sql import DataFrame + +{{- $suffix := .project_name }} +{{- if (eq .project_name "my_dlt_project") }} + {{- $suffix = short_name }} +{{- end }} + +@dlt.table(comment="Sample table with taxi data") +def sample_taxis_{{$suffix}}(): + return ( + dlt.read("taxis_raw_{{$suffix}}") + .withColumn("pickup_date", to_date("tpep_pickup_datetime")) + .groupBy("pickup_date") + .agg(count("*").alias("number_of_trips")) + )