databricks-cli/cmd/bundle/generate/utils.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

177 lines
3.8 KiB
Go
Raw Normal View History

package generate
import (
"context"
"fmt"
"io"
"os"
"path"
"path/filepath"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/notebook"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/pipelines"
Added support for Databricks Apps in DABs (#1928) ## Changes Now it's possible to configure new `app` resource in bundle and point it to the custom `source_code_path` location where Databricks App code is defined. On `databricks bundle deploy` DABs will create an app. All consecutive `databricks bundle deploy` execution will update an existing app if there are any updated On `databricks bundle run <my_app>` DABs will execute app deployment. If the app is not started yet, it will start the app first. ### Bundle configuration ``` bundle: name: apps variables: my_job_id: description: "ID of job to run app" lookup: job: "My Job" databricks_name: description: "Name for app user" additional_flags: description: "Additional flags to run command app" default: "" my_app_config: type: complex description: "Configuration for my Databricks App" default: command: - flask - --app - hello - run - ${var.additional_flags} env: - name: DATABRICKS_NAME value: ${var.databricks_name} resources: apps: my_app: name: "anester-app" # required and has to be unique description: "My App" source_code_path: ./app # required and points to location of app code config: ${var.my_app_config} resources: - name: "my-job" description: "A job for app to be able to run" job: id: ${var.my_job_id} permission: "CAN_MANAGE_RUN" permissions: - user_name: "foo@bar.com" level: "CAN_VIEW" - service_principal_name: "my_sp" level: "CAN_MANAGE" targets: dev: variables: databricks_name: "Andrew (from dev)" additional_flags: --debug prod: variables: databricks_name: "Andrew (from prod)" ``` ### Execution 1. `databricks bundle deploy -t dev` 2. `databricks bundle run my_app -t dev` **If app is started** ``` ✓ Getting the status of the app my-app ✓ App is in RUNNING state ✓ Preparing source code for new app deployment. ✓ Deployment is pending ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` **If app is not started** ``` ✓ Getting the status of the app my-app ✓ App is in UNAVAILABLE state ✓ Starting the app my-app ✓ App is starting... .... ✓ App is starting... ✓ App is started! ✓ Preparing source code for new app deployment. ✓ Downloading source code from /Workspace/Users/... ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` ## Tests Added unit and config tests + manual test. ``` --- PASS: TestAccDeployBundleWithApp (404.59s) PASS coverage: 36.8% of statements in ./... ok github.com/databricks/cli/internal/bundle 405.035s coverage: 36.8% of statements in ./... ```
2025-01-13 16:43:48 +00:00
"github.com/databricks/databricks-sdk-go/service/workspace"
"golang.org/x/sync/errgroup"
)
type downloader struct {
files map[string]string
w *databricks.WorkspaceClient
sourceDir string
configDir string
}
func (n *downloader) MarkTaskForDownload(ctx context.Context, task *jobs.Task) error {
if task.NotebookTask == nil {
return nil
}
return n.markNotebookForDownload(ctx, &task.NotebookTask.NotebookPath)
}
func (n *downloader) MarkPipelineLibraryForDownload(ctx context.Context, lib *pipelines.PipelineLibrary) error {
if lib.Notebook != nil {
return n.markNotebookForDownload(ctx, &lib.Notebook.Path)
}
if lib.File != nil {
return n.markFileForDownload(ctx, &lib.File.Path)
}
return nil
}
func (n *downloader) markFileForDownload(ctx context.Context, filePath *string) error {
_, err := n.w.Workspace.GetStatusByPath(ctx, *filePath)
if err != nil {
return err
}
filename := path.Base(*filePath)
targetPath := filepath.Join(n.sourceDir, filename)
n.files[targetPath] = *filePath
rel, err := filepath.Rel(n.configDir, targetPath)
if err != nil {
return err
}
*filePath = rel
return nil
}
Added support for Databricks Apps in DABs (#1928) ## Changes Now it's possible to configure new `app` resource in bundle and point it to the custom `source_code_path` location where Databricks App code is defined. On `databricks bundle deploy` DABs will create an app. All consecutive `databricks bundle deploy` execution will update an existing app if there are any updated On `databricks bundle run <my_app>` DABs will execute app deployment. If the app is not started yet, it will start the app first. ### Bundle configuration ``` bundle: name: apps variables: my_job_id: description: "ID of job to run app" lookup: job: "My Job" databricks_name: description: "Name for app user" additional_flags: description: "Additional flags to run command app" default: "" my_app_config: type: complex description: "Configuration for my Databricks App" default: command: - flask - --app - hello - run - ${var.additional_flags} env: - name: DATABRICKS_NAME value: ${var.databricks_name} resources: apps: my_app: name: "anester-app" # required and has to be unique description: "My App" source_code_path: ./app # required and points to location of app code config: ${var.my_app_config} resources: - name: "my-job" description: "A job for app to be able to run" job: id: ${var.my_job_id} permission: "CAN_MANAGE_RUN" permissions: - user_name: "foo@bar.com" level: "CAN_VIEW" - service_principal_name: "my_sp" level: "CAN_MANAGE" targets: dev: variables: databricks_name: "Andrew (from dev)" additional_flags: --debug prod: variables: databricks_name: "Andrew (from prod)" ``` ### Execution 1. `databricks bundle deploy -t dev` 2. `databricks bundle run my_app -t dev` **If app is started** ``` ✓ Getting the status of the app my-app ✓ App is in RUNNING state ✓ Preparing source code for new app deployment. ✓ Deployment is pending ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` **If app is not started** ``` ✓ Getting the status of the app my-app ✓ App is in UNAVAILABLE state ✓ Starting the app my-app ✓ App is starting... .... ✓ App is starting... ✓ App is started! ✓ Preparing source code for new app deployment. ✓ Downloading source code from /Workspace/Users/... ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` ## Tests Added unit and config tests + manual test. ``` --- PASS: TestAccDeployBundleWithApp (404.59s) PASS coverage: 36.8% of statements in ./... ok github.com/databricks/cli/internal/bundle 405.035s coverage: 36.8% of statements in ./... ```
2025-01-13 16:43:48 +00:00
func (n *downloader) markDirectoryForDownload(ctx context.Context, dirPath *string) error {
_, err := n.w.Workspace.GetStatusByPath(ctx, *dirPath)
if err != nil {
return err
}
objects, err := n.w.Workspace.RecursiveList(ctx, *dirPath)
if err != nil {
return err
}
for _, obj := range objects {
if obj.ObjectType == workspace.ObjectTypeDirectory {
continue
}
err := n.markFileForDownload(ctx, &obj.Path)
if err != nil {
return err
}
}
rel, err := filepath.Rel(n.configDir, n.sourceDir)
if err != nil {
return err
}
*dirPath = rel
return nil
}
func (n *downloader) markNotebookForDownload(ctx context.Context, notebookPath *string) error {
info, err := n.w.Workspace.GetStatusByPath(ctx, *notebookPath)
if err != nil {
return err
}
ext := notebook.GetExtensionByLanguage(info)
filename := path.Base(*notebookPath) + ext
targetPath := filepath.Join(n.sourceDir, filename)
n.files[targetPath] = *notebookPath
// Update the notebook path to be relative to the config dir
rel, err := filepath.Rel(n.configDir, targetPath)
if err != nil {
return err
}
*notebookPath = rel
return nil
}
func (n *downloader) FlushToDisk(ctx context.Context, force bool) error {
err := os.MkdirAll(n.sourceDir, 0o755)
if err != nil {
return err
}
// First check that all files can be written
for targetPath := range n.files {
info, err := os.Stat(targetPath)
if err == nil {
if info.IsDir() {
return fmt.Errorf("%s is a directory", targetPath)
}
if !force {
return fmt.Errorf("%s already exists. Use --force to overwrite", targetPath)
}
}
}
errs, errCtx := errgroup.WithContext(ctx)
for k, v := range n.files {
targetPath := k
filePath := v
errs.Go(func() error {
reader, err := n.w.Workspace.Download(errCtx, filePath)
if err != nil {
return err
}
file, err := os.Create(targetPath)
if err != nil {
return err
}
defer file.Close()
_, err = io.Copy(file, reader)
if err != nil {
return err
}
cmdio.LogString(errCtx, "File successfully saved to "+targetPath)
return reader.Close()
})
}
return errs.Wait()
}
func newDownloader(w *databricks.WorkspaceClient, sourceDir, configDir string) *downloader {
return &downloader{
files: make(map[string]string),
w: w,
sourceDir: sourceDir,
configDir: configDir,
}
}