2023-06-26 23:31:20 +00:00
|
|
|
// Package bundle is the top level package for Databricks Asset Bundles.
|
2022-12-12 11:49:25 +00:00
|
|
|
//
|
|
|
|
// A bundle is represented by the [Bundle] type. It consists of configuration
|
|
|
|
// and runtime state, such as a client to a Databricks workspace.
|
|
|
|
// Every mutation to a bundle's configuration or state is represented as a [Mutator].
|
|
|
|
// This interface makes every mutation observable and lets us reason about sequencing.
|
2022-11-18 09:57:31 +00:00
|
|
|
package bundle
|
|
|
|
|
|
|
|
import (
|
2023-08-11 12:28:05 +00:00
|
|
|
"context"
|
2023-03-29 14:36:35 +00:00
|
|
|
"fmt"
|
2022-11-30 13:40:41 +00:00
|
|
|
"os"
|
2022-11-18 09:57:31 +00:00
|
|
|
"path/filepath"
|
2022-11-23 14:20:03 +00:00
|
|
|
"sync"
|
2022-11-18 09:57:31 +00:00
|
|
|
|
2023-05-16 16:35:39 +00:00
|
|
|
"github.com/databricks/cli/bundle/config"
|
2023-09-11 08:18:43 +00:00
|
|
|
"github.com/databricks/cli/bundle/env"
|
Persist deployment metadata in WSFS (#845)
## Changes
This PR introduces a metadata struct that stores a subset of bundle
configuration that we wish to expose to other Databricks services that
wish to integrate with bundles.
This metadata file is uploaded to a file
`${bundle.workspace.state_path}/metadata.json` in the WSFS destination
of the bundle deployment.
Documentation for emitted metadata fields:
* `version`: Version for the metadata file schema
* `config.bundle.git.branch`: Name of the git branch the bundle was
deployed from.
* `config.bundle.git.origin_url`: URL for git remote "origin"
* `config.bundle.git.bundle_root_path`: Relative path of the bundle root
from the root of the git repository. Is set to "." if they are the same.
* `config.bundle.git.commit`: SHA-1 commit hash of the exact commit this
bundle was deployed from. Note, the deployment might not exactly match
this commit version if there are changes that have not been committed to
git at deploy time,
* `file_path`: Path in workspace where we sync bundle files to.
* `resources.jobs.[job-ref].id`: Id of the job
* `resources.jobs.[job-ref].relative_path`: Relative path of the yaml
config file from the bundle root where this job was defined.
Example metadata object when bundle root and git root are the same:
```json
{
"version": 1,
"config": {
"bundle": {
"lock": {},
"git": {
"branch": "master",
"origin_url": "www.host.com",
"commit": "7af8e5d3f5dceffff9295d42d21606ccf056dce0",
"bundle_root_path": "."
}
},
"workspace": {
"file_path": "/Users/shreyas.goenka@databricks.com/.bundle/pipeline-progress/default/files"
},
"resources": {
"jobs": {
"bar": {
"id": "245921165354846",
"relative_path": "databricks.yml"
}
}
},
"sync": {}
}
}
```
Example metadata when the git root is one level above the bundle repo:
```json
{
"version": 1,
"config": {
"bundle": {
"lock": {},
"git": {
"branch": "dev-branch",
"origin_url": "www.my-repo.com",
"commit": "3db46ef750998952b00a2b3e7991e31787e4b98b",
"bundle_root_path": "pipeline-progress"
}
},
"workspace": {
"file_path": "/Users/shreyas.goenka@databricks.com/.bundle/pipeline-progress/default/files"
},
"resources": {
"jobs": {
"bar": {
"id": "245921165354846",
"relative_path": "databricks.yml"
}
}
},
"sync": {}
}
}
```
This unblocks integration to the jobs break glass UI for bundles.
## Tests
Unit tests and integration tests.
2023-10-27 12:55:43 +00:00
|
|
|
"github.com/databricks/cli/bundle/metadata"
|
2024-06-17 09:48:52 +00:00
|
|
|
"github.com/databricks/cli/libs/fileset"
|
2023-05-16 16:35:39 +00:00
|
|
|
"github.com/databricks/cli/libs/locker"
|
2023-08-11 12:28:05 +00:00
|
|
|
"github.com/databricks/cli/libs/log"
|
2023-10-02 06:58:51 +00:00
|
|
|
"github.com/databricks/cli/libs/tags"
|
2023-05-16 16:35:39 +00:00
|
|
|
"github.com/databricks/cli/libs/terraform"
|
2024-05-30 07:41:50 +00:00
|
|
|
"github.com/databricks/cli/libs/vfs"
|
2022-11-24 20:41:57 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go"
|
2023-03-29 18:46:09 +00:00
|
|
|
sdkconfig "github.com/databricks/databricks-sdk-go/config"
|
2022-12-15 14:12:47 +00:00
|
|
|
"github.com/hashicorp/terraform-exec/tfexec"
|
2022-11-18 09:57:31 +00:00
|
|
|
)
|
|
|
|
|
2023-08-18 08:07:25 +00:00
|
|
|
const internalFolder = ".internal"
|
|
|
|
|
2022-11-18 09:57:31 +00:00
|
|
|
type Bundle struct {
|
2024-09-27 10:03:05 +00:00
|
|
|
// BundleRootPath is the local path to the root directory of the bundle.
|
2024-03-27 09:03:24 +00:00
|
|
|
// It is set when we instantiate a new bundle instance.
|
2024-09-27 10:03:05 +00:00
|
|
|
BundleRootPath string
|
2024-03-27 09:03:24 +00:00
|
|
|
|
2024-09-27 10:03:05 +00:00
|
|
|
// BundleRoot is a virtual filesystem path to [BundleRootPath].
|
2024-07-03 10:13:22 +00:00
|
|
|
// Exclusively use this field for filesystem operations.
|
|
|
|
BundleRoot vfs.Path
|
|
|
|
|
2024-08-21 15:33:25 +00:00
|
|
|
// SyncRootPath is the local path to the root directory of files that are synchronized to the workspace.
|
2024-09-27 10:03:05 +00:00
|
|
|
// By default, it is the same as [BundleRootPath].
|
|
|
|
// If it is different, it must be an ancestor to [BundleRootPath].
|
|
|
|
// That is, [SyncRootPath] must contain [BundleRootPath].
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath string
|
|
|
|
|
2024-09-27 10:03:05 +00:00
|
|
|
// SyncRoot is a virtual filesystem path to [SyncRootPath].
|
|
|
|
// Exclusively use this field for filesystem operations.
|
|
|
|
SyncRoot vfs.Path
|
|
|
|
|
2024-12-05 10:13:13 +00:00
|
|
|
// Path to the root of git worktree containing the bundle.
|
|
|
|
// https://git-scm.com/docs/git-worktree
|
|
|
|
WorktreeRoot vfs.Path
|
|
|
|
|
2024-09-27 10:03:05 +00:00
|
|
|
// Config contains the bundle configuration.
|
|
|
|
// It is loaded from the bundle configuration files and mutators may update it.
|
2022-11-18 09:57:31 +00:00
|
|
|
Config config.Root
|
2022-11-23 14:20:03 +00:00
|
|
|
|
Persist deployment metadata in WSFS (#845)
## Changes
This PR introduces a metadata struct that stores a subset of bundle
configuration that we wish to expose to other Databricks services that
wish to integrate with bundles.
This metadata file is uploaded to a file
`${bundle.workspace.state_path}/metadata.json` in the WSFS destination
of the bundle deployment.
Documentation for emitted metadata fields:
* `version`: Version for the metadata file schema
* `config.bundle.git.branch`: Name of the git branch the bundle was
deployed from.
* `config.bundle.git.origin_url`: URL for git remote "origin"
* `config.bundle.git.bundle_root_path`: Relative path of the bundle root
from the root of the git repository. Is set to "." if they are the same.
* `config.bundle.git.commit`: SHA-1 commit hash of the exact commit this
bundle was deployed from. Note, the deployment might not exactly match
this commit version if there are changes that have not been committed to
git at deploy time,
* `file_path`: Path in workspace where we sync bundle files to.
* `resources.jobs.[job-ref].id`: Id of the job
* `resources.jobs.[job-ref].relative_path`: Relative path of the yaml
config file from the bundle root where this job was defined.
Example metadata object when bundle root and git root are the same:
```json
{
"version": 1,
"config": {
"bundle": {
"lock": {},
"git": {
"branch": "master",
"origin_url": "www.host.com",
"commit": "7af8e5d3f5dceffff9295d42d21606ccf056dce0",
"bundle_root_path": "."
}
},
"workspace": {
"file_path": "/Users/shreyas.goenka@databricks.com/.bundle/pipeline-progress/default/files"
},
"resources": {
"jobs": {
"bar": {
"id": "245921165354846",
"relative_path": "databricks.yml"
}
}
},
"sync": {}
}
}
```
Example metadata when the git root is one level above the bundle repo:
```json
{
"version": 1,
"config": {
"bundle": {
"lock": {},
"git": {
"branch": "dev-branch",
"origin_url": "www.my-repo.com",
"commit": "3db46ef750998952b00a2b3e7991e31787e4b98b",
"bundle_root_path": "pipeline-progress"
}
},
"workspace": {
"file_path": "/Users/shreyas.goenka@databricks.com/.bundle/pipeline-progress/default/files"
},
"resources": {
"jobs": {
"bar": {
"id": "245921165354846",
"relative_path": "databricks.yml"
}
}
},
"sync": {}
}
}
```
This unblocks integration to the jobs break glass UI for bundles.
## Tests
Unit tests and integration tests.
2023-10-27 12:55:43 +00:00
|
|
|
// Metadata about the bundle deployment. This is the interface Databricks services
|
|
|
|
// rely on to integrate with bundles when they need additional information about
|
|
|
|
// a bundle deployment.
|
|
|
|
//
|
|
|
|
// After deploy, a file containing the metadata (metadata.json) can be found
|
|
|
|
// in the WSFS location containing the bundle state.
|
|
|
|
Metadata metadata.Metadata
|
|
|
|
|
2022-11-23 14:20:03 +00:00
|
|
|
// Store a pointer to the workspace client.
|
|
|
|
// It can be initialized on demand after loading the configuration.
|
|
|
|
clientOnce sync.Once
|
2022-11-24 20:41:57 +00:00
|
|
|
client *databricks.WorkspaceClient
|
2022-12-15 14:12:47 +00:00
|
|
|
|
2024-06-17 09:48:52 +00:00
|
|
|
// Files that are synced to the workspace.file_path
|
|
|
|
Files []fileset.File
|
|
|
|
|
2022-12-15 14:12:47 +00:00
|
|
|
// Stores an initialized copy of this bundle's Terraform wrapper.
|
|
|
|
Terraform *tfexec.Terraform
|
2023-03-22 15:37:26 +00:00
|
|
|
|
|
|
|
// Stores the locker responsible for acquiring/releasing a deployment lock.
|
|
|
|
Locker *locker.Locker
|
2023-04-06 10:54:58 +00:00
|
|
|
|
|
|
|
Plan *terraform.Plan
|
|
|
|
|
|
|
|
// if true, we skip approval checks for deploy, destroy resources and delete
|
|
|
|
// files
|
|
|
|
AutoApprove bool
|
2023-10-02 06:58:51 +00:00
|
|
|
|
|
|
|
// Tagging is used to normalize tag keys and values.
|
|
|
|
// The implementation depends on the cloud being targeted.
|
|
|
|
Tagging tags.Cloud
|
2022-11-18 09:57:31 +00:00
|
|
|
}
|
|
|
|
|
2023-08-11 12:28:05 +00:00
|
|
|
func Load(ctx context.Context, path string) (*Bundle, error) {
|
2024-03-27 09:03:24 +00:00
|
|
|
b := &Bundle{
|
2024-09-27 10:03:05 +00:00
|
|
|
BundleRootPath: filepath.Clean(path),
|
|
|
|
BundleRoot: vfs.MustNew(path),
|
2024-03-27 09:03:24 +00:00
|
|
|
}
|
2023-07-18 10:16:34 +00:00
|
|
|
configFile, err := config.FileNames.FindInPath(path)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-09-27 10:03:05 +00:00
|
|
|
log.Debugf(ctx, "Found bundle root at %s (file %s)", b.BundleRootPath, configFile)
|
2023-11-15 14:03:36 +00:00
|
|
|
return b, nil
|
2022-11-18 09:57:31 +00:00
|
|
|
}
|
2022-11-21 14:39:53 +00:00
|
|
|
|
2023-01-27 15:57:39 +00:00
|
|
|
// MustLoad returns a bundle configuration.
|
|
|
|
// It returns an error if a bundle was not found or could not be loaded.
|
2023-08-11 12:28:05 +00:00
|
|
|
func MustLoad(ctx context.Context) (*Bundle, error) {
|
2023-09-11 08:18:43 +00:00
|
|
|
root, err := mustGetRoot(ctx)
|
2022-11-21 14:39:53 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2023-01-27 15:57:39 +00:00
|
|
|
|
2023-08-11 12:28:05 +00:00
|
|
|
return Load(ctx, root)
|
2023-01-27 15:57:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// TryLoad returns a bundle configuration if there is one, but doesn't fail if there isn't one.
|
|
|
|
// It returns an error if a bundle was found but could not be loaded.
|
|
|
|
// It returns a `nil` bundle if a bundle was not found.
|
2023-08-11 12:28:05 +00:00
|
|
|
func TryLoad(ctx context.Context) (*Bundle, error) {
|
2023-09-11 08:18:43 +00:00
|
|
|
root, err := tryGetRoot(ctx)
|
2023-01-27 15:57:39 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// No root is fine in this function.
|
|
|
|
if root == "" {
|
|
|
|
return nil, nil
|
|
|
|
}
|
2022-11-21 14:39:53 +00:00
|
|
|
|
2023-08-11 12:28:05 +00:00
|
|
|
return Load(ctx, root)
|
2022-11-21 14:39:53 +00:00
|
|
|
}
|
|
|
|
|
2023-11-30 14:28:01 +00:00
|
|
|
func (b *Bundle) InitializeWorkspaceClient() (*databricks.WorkspaceClient, error) {
|
|
|
|
client, err := b.Config.Workspace.Client()
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot resolve bundle auth configuration: %w", err)
|
|
|
|
}
|
|
|
|
return client, nil
|
|
|
|
}
|
|
|
|
|
2022-11-24 20:41:57 +00:00
|
|
|
func (b *Bundle) WorkspaceClient() *databricks.WorkspaceClient {
|
2022-11-23 14:20:03 +00:00
|
|
|
b.clientOnce.Do(func() {
|
2022-11-24 20:41:57 +00:00
|
|
|
var err error
|
2023-11-30 14:28:01 +00:00
|
|
|
b.client, err = b.InitializeWorkspaceClient()
|
2022-11-24 20:41:57 +00:00
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2022-11-23 14:20:03 +00:00
|
|
|
})
|
|
|
|
return b.client
|
|
|
|
}
|
2022-11-30 13:40:41 +00:00
|
|
|
|
2024-01-19 14:12:58 +00:00
|
|
|
// SetWorkpaceClient sets the workspace client for this bundle.
|
|
|
|
// This is used to inject a mock client for testing.
|
|
|
|
func (b *Bundle) SetWorkpaceClient(w *databricks.WorkspaceClient) {
|
|
|
|
b.clientOnce.Do(func() {})
|
|
|
|
b.client = w
|
|
|
|
}
|
|
|
|
|
2022-11-30 13:40:41 +00:00
|
|
|
// CacheDir returns directory to use for temporary files for this bundle.
|
2023-08-17 15:22:32 +00:00
|
|
|
// Scoped to the bundle's target.
|
2023-09-11 08:18:43 +00:00
|
|
|
func (b *Bundle) CacheDir(ctx context.Context, paths ...string) (string, error) {
|
2023-08-17 15:22:32 +00:00
|
|
|
if b.Config.Bundle.Target == "" {
|
|
|
|
panic("target not set")
|
2022-11-30 13:40:41 +00:00
|
|
|
}
|
|
|
|
|
2023-09-11 08:18:43 +00:00
|
|
|
cacheDirName, exists := env.TempDir(ctx)
|
2023-06-21 07:53:54 +00:00
|
|
|
if !exists || cacheDirName == "" {
|
|
|
|
cacheDirName = filepath.Join(
|
|
|
|
// Anchor at bundle root directory.
|
2024-09-27 10:03:05 +00:00
|
|
|
b.BundleRootPath,
|
2023-06-21 07:53:54 +00:00
|
|
|
// Static cache directory.
|
|
|
|
".databricks",
|
|
|
|
"bundle",
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2022-12-15 16:30:33 +00:00
|
|
|
// Fixed components of the result path.
|
|
|
|
parts := []string{
|
|
|
|
cacheDirName,
|
2023-08-17 15:22:32 +00:00
|
|
|
// Scope with target name.
|
|
|
|
b.Config.Bundle.Target,
|
2022-12-15 16:30:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Append dynamic components of the result path.
|
|
|
|
parts = append(parts, paths...)
|
|
|
|
|
2022-11-30 13:40:41 +00:00
|
|
|
// Make directory if it doesn't exist yet.
|
2022-12-15 16:30:33 +00:00
|
|
|
dir := filepath.Join(parts...)
|
2024-12-12 09:28:42 +00:00
|
|
|
err := os.MkdirAll(dir, 0o700)
|
2022-11-30 13:40:41 +00:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
return dir, nil
|
|
|
|
}
|
2023-03-29 14:36:35 +00:00
|
|
|
|
2023-08-18 08:07:25 +00:00
|
|
|
// This directory is used to store and automaticaly sync internal bundle files, such as, f.e
|
|
|
|
// notebook trampoline files for Python wheel and etc.
|
2023-09-11 08:18:43 +00:00
|
|
|
func (b *Bundle) InternalDir(ctx context.Context) (string, error) {
|
|
|
|
cacheDir, err := b.CacheDir(ctx)
|
2023-08-18 08:07:25 +00:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
dir := filepath.Join(cacheDir, internalFolder)
|
2024-12-12 09:28:42 +00:00
|
|
|
err = os.MkdirAll(dir, 0o700)
|
2023-08-18 08:07:25 +00:00
|
|
|
if err != nil {
|
|
|
|
return dir, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return dir, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetSyncIncludePatterns returns a list of user defined includes
|
|
|
|
// And also adds InternalDir folder to include list for sync command
|
|
|
|
// so this folder is always synced
|
2023-09-11 08:18:43 +00:00
|
|
|
func (b *Bundle) GetSyncIncludePatterns(ctx context.Context) ([]string, error) {
|
|
|
|
internalDir, err := b.InternalDir(ctx)
|
2023-08-18 08:07:25 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-09-27 10:03:05 +00:00
|
|
|
internalDirRel, err := filepath.Rel(b.BundleRootPath, internalDir)
|
2023-08-18 08:07:25 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return append(b.Config.Sync.Include, filepath.ToSlash(filepath.Join(internalDirRel, "*.*"))), nil
|
|
|
|
}
|
|
|
|
|
2023-03-29 18:46:09 +00:00
|
|
|
// AuthEnv returns a map with environment variables and their values
|
|
|
|
// derived from the workspace client configuration that was resolved
|
|
|
|
// in the context of this bundle.
|
|
|
|
//
|
|
|
|
// This map can be used to configure authentication for tools that
|
|
|
|
// we call into from this bundle context.
|
|
|
|
func (b *Bundle) AuthEnv() (map[string]string, error) {
|
|
|
|
if b.client == nil {
|
|
|
|
return nil, fmt.Errorf("workspace client not initialized yet")
|
|
|
|
}
|
|
|
|
|
|
|
|
cfg := b.client.Config
|
|
|
|
out := make(map[string]string)
|
|
|
|
for _, attr := range sdkconfig.ConfigAttributes {
|
|
|
|
// Ignore profile so that downstream tools don't try and reload
|
|
|
|
// the profile even though we know the current configuration is valid.
|
|
|
|
if attr.Name == "profile" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if len(attr.EnvVars) == 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if attr.IsZero(cfg) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
out[attr.EnvVars[0]] = attr.GetString(cfg)
|
|
|
|
}
|
|
|
|
|
|
|
|
return out, nil
|
|
|
|
}
|