mirror of https://github.com/databricks/cli.git
Persist deployment metadata in WSFS (#845)
## Changes This PR introduces a metadata struct that stores a subset of bundle configuration that we wish to expose to other Databricks services that wish to integrate with bundles. This metadata file is uploaded to a file `${bundle.workspace.state_path}/metadata.json` in the WSFS destination of the bundle deployment. Documentation for emitted metadata fields: * `version`: Version for the metadata file schema * `config.bundle.git.branch`: Name of the git branch the bundle was deployed from. * `config.bundle.git.origin_url`: URL for git remote "origin" * `config.bundle.git.bundle_root_path`: Relative path of the bundle root from the root of the git repository. Is set to "." if they are the same. * `config.bundle.git.commit`: SHA-1 commit hash of the exact commit this bundle was deployed from. Note, the deployment might not exactly match this commit version if there are changes that have not been committed to git at deploy time, * `file_path`: Path in workspace where we sync bundle files to. * `resources.jobs.[job-ref].id`: Id of the job * `resources.jobs.[job-ref].relative_path`: Relative path of the yaml config file from the bundle root where this job was defined. Example metadata object when bundle root and git root are the same: ```json { "version": 1, "config": { "bundle": { "lock": {}, "git": { "branch": "master", "origin_url": "www.host.com", "commit": "7af8e5d3f5dceffff9295d42d21606ccf056dce0", "bundle_root_path": "." } }, "workspace": { "file_path": "/Users/shreyas.goenka@databricks.com/.bundle/pipeline-progress/default/files" }, "resources": { "jobs": { "bar": { "id": "245921165354846", "relative_path": "databricks.yml" } } }, "sync": {} } } ``` Example metadata when the git root is one level above the bundle repo: ```json { "version": 1, "config": { "bundle": { "lock": {}, "git": { "branch": "dev-branch", "origin_url": "www.my-repo.com", "commit": "3db46ef750998952b00a2b3e7991e31787e4b98b", "bundle_root_path": "pipeline-progress" } }, "workspace": { "file_path": "/Users/shreyas.goenka@databricks.com/.bundle/pipeline-progress/default/files" }, "resources": { "jobs": { "bar": { "id": "245921165354846", "relative_path": "databricks.yml" } } }, "sync": {} } } ``` This unblocks integration to the jobs break glass UI for bundles. ## Tests Unit tests and integration tests.
This commit is contained in:
parent
905fe10e62
commit
5a8cd0c5bc
|
@ -15,6 +15,7 @@ import (
|
|||
|
||||
"github.com/databricks/cli/bundle/config"
|
||||
"github.com/databricks/cli/bundle/env"
|
||||
"github.com/databricks/cli/bundle/metadata"
|
||||
"github.com/databricks/cli/folders"
|
||||
"github.com/databricks/cli/libs/git"
|
||||
"github.com/databricks/cli/libs/locker"
|
||||
|
@ -31,6 +32,14 @@ const internalFolder = ".internal"
|
|||
type Bundle struct {
|
||||
Config config.Root
|
||||
|
||||
// Metadata about the bundle deployment. This is the interface Databricks services
|
||||
// rely on to integrate with bundles when they need additional information about
|
||||
// a bundle deployment.
|
||||
//
|
||||
// After deploy, a file containing the metadata (metadata.json) can be found
|
||||
// in the WSFS location containing the bundle state.
|
||||
Metadata metadata.Metadata
|
||||
|
||||
// Store a pointer to the workspace client.
|
||||
// It can be initialized on demand after loading the configuration.
|
||||
clientOnce sync.Once
|
||||
|
|
|
@ -29,7 +29,7 @@ type Bundle struct {
|
|||
Lock Lock `json:"lock" bundle:"readonly"`
|
||||
|
||||
// Force-override Git branch validation.
|
||||
Force bool `json:"force" bundle:"readonly"`
|
||||
Force bool `json:"force,omitempty" bundle:"readonly"`
|
||||
|
||||
// Contains Git information like current commit, current branch and
|
||||
// origin url. Automatically loaded by reading .git directory if not specified
|
||||
|
|
|
@ -5,6 +5,9 @@ type Git struct {
|
|||
OriginURL string `json:"origin_url,omitempty"`
|
||||
Commit string `json:"commit,omitempty" bundle:"readonly"`
|
||||
|
||||
// Path to bundle root relative to the git repository root.
|
||||
BundleRootPath string `json:"bundle_root_path,omitempty" bundle:"readonly"`
|
||||
|
||||
// Inferred is set to true if the Git details were inferred and weren't set explicitly
|
||||
Inferred bool `json:"-" bundle:"readonly"`
|
||||
|
||||
|
|
|
@ -4,11 +4,11 @@ type Lock struct {
|
|||
// Enabled toggles deployment lock. True by default.
|
||||
// Use a pointer value so that only explicitly configured values are set
|
||||
// and we don't merge configuration with zero-initialized values.
|
||||
Enabled *bool `json:"enabled"`
|
||||
Enabled *bool `json:"enabled,omitempty"`
|
||||
|
||||
// Force acquisition of deployment lock even if it is currently held.
|
||||
// This may be necessary if a prior deployment failed to release the lock.
|
||||
Force bool `json:"force"`
|
||||
Force bool `json:"force,omitempty"`
|
||||
}
|
||||
|
||||
func (lock Lock) IsEnabled() bool {
|
||||
|
|
|
@ -2,6 +2,7 @@ package mutator
|
|||
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/libs/git"
|
||||
|
@ -52,5 +53,17 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) error {
|
|||
remoteUrl := repo.OriginUrl()
|
||||
b.Config.Bundle.Git.OriginURL = remoteUrl
|
||||
}
|
||||
|
||||
// Compute relative path of the bundle root from the Git repo root.
|
||||
absBundlePath, err := filepath.Abs(b.Config.Path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// repo.Root() returns the absolute path of the repo
|
||||
relBundlePath, err := filepath.Rel(repo.Root(), absBundlePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.Config.Bundle.Git.BundleRootPath = filepath.ToSlash(relBundlePath)
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -6,8 +6,8 @@ import (
|
|||
)
|
||||
|
||||
type Paths struct {
|
||||
// ConfigFilePath holds the path to the configuration file that
|
||||
// described the resource that this type is embedded in.
|
||||
// Absolute path on the local file system to the configuration file that holds
|
||||
// the definition of this resource.
|
||||
ConfigFilePath string `json:"-" bundle:"readonly"`
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
package metadata
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/metadata"
|
||||
)
|
||||
|
||||
type compute struct{}
|
||||
|
||||
func Compute() bundle.Mutator {
|
||||
return &compute{}
|
||||
}
|
||||
|
||||
func (m *compute) Name() string {
|
||||
return "metadata.Compute"
|
||||
}
|
||||
|
||||
func (m *compute) Apply(_ context.Context, b *bundle.Bundle) error {
|
||||
b.Metadata = metadata.Metadata{
|
||||
Version: metadata.Version,
|
||||
Config: metadata.Config{},
|
||||
}
|
||||
|
||||
// Set git details in metadata
|
||||
b.Metadata.Config.Bundle.Git = b.Config.Bundle.Git
|
||||
|
||||
// Set job config paths in metadata
|
||||
jobsMetadata := make(map[string]*metadata.Job)
|
||||
for name, job := range b.Config.Resources.Jobs {
|
||||
// Compute config file path the job is defined in, relative to the bundle
|
||||
// root
|
||||
relativePath, err := filepath.Rel(b.Config.Path, job.ConfigFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to compute relative path for job %s: %w", name, err)
|
||||
}
|
||||
// Metadata for the job
|
||||
jobsMetadata[name] = &metadata.Job{
|
||||
ID: job.ID,
|
||||
RelativePath: filepath.ToSlash(relativePath),
|
||||
}
|
||||
}
|
||||
b.Metadata.Config.Resources.Jobs = jobsMetadata
|
||||
|
||||
// Set file upload destination of the bundle in metadata
|
||||
b.Metadata.Config.Workspace.FilesPath = b.Config.Workspace.FilesPath
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
package metadata
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/config"
|
||||
"github.com/databricks/cli/bundle/config/paths"
|
||||
"github.com/databricks/cli/bundle/config/resources"
|
||||
"github.com/databricks/cli/bundle/metadata"
|
||||
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestComputeMetadataMutator(t *testing.T) {
|
||||
b := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Workspace: config.Workspace{
|
||||
RootPath: "/Users/shreyas.goenka@databricks.com",
|
||||
ArtifactsPath: "/Users/shreyas.goenka@databricks.com/artifacts",
|
||||
FilesPath: "/Users/shreyas.goenka@databricks.com/files",
|
||||
},
|
||||
Bundle: config.Bundle{
|
||||
Name: "my-bundle",
|
||||
Target: "development",
|
||||
Git: config.Git{
|
||||
Branch: "my-branch",
|
||||
OriginURL: "www.host.com",
|
||||
Commit: "abcd",
|
||||
BundleRootPath: "a/b/c/d",
|
||||
},
|
||||
},
|
||||
Resources: config.Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"my-job-1": {
|
||||
Paths: paths.Paths{
|
||||
ConfigFilePath: "a/b/c",
|
||||
},
|
||||
ID: "1111",
|
||||
JobSettings: &jobs.JobSettings{
|
||||
Name: "My Job One",
|
||||
},
|
||||
},
|
||||
"my-job-2": {
|
||||
Paths: paths.Paths{
|
||||
ConfigFilePath: "d/e/f",
|
||||
},
|
||||
ID: "2222",
|
||||
JobSettings: &jobs.JobSettings{
|
||||
Name: "My Job Two",
|
||||
},
|
||||
},
|
||||
},
|
||||
Pipelines: map[string]*resources.Pipeline{
|
||||
"my-pipeline": {
|
||||
Paths: paths.Paths{
|
||||
ConfigFilePath: "abc",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
expectedMetadata := metadata.Metadata{
|
||||
Version: metadata.Version,
|
||||
Config: metadata.Config{
|
||||
Workspace: metadata.Workspace{
|
||||
FilesPath: "/Users/shreyas.goenka@databricks.com/files",
|
||||
},
|
||||
Bundle: metadata.Bundle{
|
||||
Git: config.Git{
|
||||
Branch: "my-branch",
|
||||
OriginURL: "www.host.com",
|
||||
Commit: "abcd",
|
||||
BundleRootPath: "a/b/c/d",
|
||||
},
|
||||
},
|
||||
Resources: metadata.Resources{
|
||||
Jobs: map[string]*metadata.Job{
|
||||
"my-job-1": {
|
||||
RelativePath: "a/b/c",
|
||||
ID: "1111",
|
||||
},
|
||||
"my-job-2": {
|
||||
RelativePath: "d/e/f",
|
||||
ID: "2222",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err := Compute().Apply(context.Background(), b)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, expectedMetadata, b.Metadata)
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
package metadata
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/libs/filer"
|
||||
)
|
||||
|
||||
const MetadataFileName = "metadata.json"
|
||||
|
||||
type upload struct{}
|
||||
|
||||
func Upload() bundle.Mutator {
|
||||
return &upload{}
|
||||
}
|
||||
|
||||
func (m *upload) Name() string {
|
||||
return "metadata.Upload"
|
||||
}
|
||||
|
||||
func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) error {
|
||||
f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(), b.Config.Workspace.StatePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
metadata, err := json.MarshalIndent(b.Metadata, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return f.Write(ctx, MetadataFileName, bytes.NewReader(metadata), filer.CreateParentDirectories, filer.OverwriteIfExists)
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package metadata
|
||||
|
||||
import (
|
||||
"github.com/databricks/cli/bundle/config"
|
||||
)
|
||||
|
||||
const Version = 1
|
||||
|
||||
type Bundle struct {
|
||||
Git config.Git `json:"git,omitempty"`
|
||||
}
|
||||
|
||||
type Workspace struct {
|
||||
FilesPath string `json:"file_path,omitempty"`
|
||||
}
|
||||
|
||||
type Job struct {
|
||||
ID string `json:"id,omitempty"`
|
||||
|
||||
// Relative path from the bundle root to the configuration file that holds
|
||||
// the definition of this resource.
|
||||
RelativePath string `json:"relative_path,omitempty"`
|
||||
}
|
||||
|
||||
type Resources struct {
|
||||
Jobs map[string]*Job `json:"jobs,omitempty"`
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Bundle Bundle `json:"bundle,omitempty"`
|
||||
Workspace Workspace `json:"workspace,omitempty"`
|
||||
Resources Resources `json:"resources,omitempty"`
|
||||
}
|
||||
|
||||
// Metadata about the bundle deployment. This is the interface Databricks services
|
||||
// rely on to integrate with bundles when they need additional information about
|
||||
// a bundle deployment.
|
||||
//
|
||||
// After deploy, a file containing the metadata (metadata.json) can be found
|
||||
// in the WSFS location containing the bundle state.
|
||||
type Metadata struct {
|
||||
Version int `json:"version"`
|
||||
|
||||
Config Config `json:"config"`
|
||||
}
|
|
@ -7,6 +7,7 @@ import (
|
|||
"github.com/databricks/cli/bundle/config/mutator"
|
||||
"github.com/databricks/cli/bundle/deploy/files"
|
||||
"github.com/databricks/cli/bundle/deploy/lock"
|
||||
"github.com/databricks/cli/bundle/deploy/metadata"
|
||||
"github.com/databricks/cli/bundle/deploy/terraform"
|
||||
"github.com/databricks/cli/bundle/libraries"
|
||||
"github.com/databricks/cli/bundle/python"
|
||||
|
@ -31,7 +32,12 @@ func Deploy() bundle.Mutator {
|
|||
terraform.StatePull(),
|
||||
bundle.Defer(
|
||||
terraform.Apply(),
|
||||
terraform.StatePush(),
|
||||
bundle.Seq(
|
||||
terraform.StatePush(),
|
||||
terraform.Load(),
|
||||
metadata.Compute(),
|
||||
metadata.Upload(),
|
||||
),
|
||||
),
|
||||
),
|
||||
lock.Release(lock.GoalDeploy),
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"properties": {
|
||||
"unique_id": {
|
||||
"type": "string",
|
||||
"description": "Unique ID for job name"
|
||||
"description": "Unique ID for pipeline name"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"properties": {
|
||||
"unique_id": {
|
||||
"type": "string",
|
||||
"description": "Unique ID for job name"
|
||||
},
|
||||
"spark_version": {
|
||||
"type": "string",
|
||||
"description": "Spark version used for job cluster"
|
||||
},
|
||||
"node_type_id": {
|
||||
"type": "string",
|
||||
"description": "Node type id for job cluster"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
# Databricks notebook source
|
||||
print("bye")
|
|
@ -0,0 +1,12 @@
|
|||
resources:
|
||||
jobs:
|
||||
bar:
|
||||
name: test-job-metadata-2-{{.unique_id}}
|
||||
tasks:
|
||||
- task_key: my_notebook_task
|
||||
new_cluster:
|
||||
num_workers: 1
|
||||
spark_version: "{{.spark_version}}"
|
||||
node_type_id: "{{.node_type_id}}"
|
||||
notebook_task:
|
||||
notebook_path: "./bar.py"
|
|
@ -0,0 +1,21 @@
|
|||
bundle:
|
||||
name: job-metadata
|
||||
|
||||
workspace:
|
||||
root_path: "~/.bundle/{{.unique_id}}"
|
||||
|
||||
include:
|
||||
- "a/b/*.yml"
|
||||
|
||||
resources:
|
||||
jobs:
|
||||
foo:
|
||||
name: test-job-metadata-1-{{.unique_id}}
|
||||
tasks:
|
||||
- task_key: my_notebook_task
|
||||
new_cluster:
|
||||
num_workers: 1
|
||||
spark_version: "{{.spark_version}}"
|
||||
node_type_id: "{{.node_type_id}}"
|
||||
notebook_task:
|
||||
notebook_path: "./foo.py"
|
|
@ -0,0 +1,2 @@
|
|||
# Databricks notebook source
|
||||
print("hello")
|
|
@ -0,0 +1,105 @@
|
|||
package bundle
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"path"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/databricks/cli/bundle/config"
|
||||
"github.com/databricks/cli/bundle/metadata"
|
||||
"github.com/databricks/cli/internal"
|
||||
"github.com/databricks/cli/libs/filer"
|
||||
"github.com/databricks/databricks-sdk-go"
|
||||
"github.com/google/uuid"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestAccJobsMetadataFile(t *testing.T) {
|
||||
env := internal.GetEnvOrSkipTest(t, "CLOUD_ENV")
|
||||
t.Log(env)
|
||||
|
||||
w, err := databricks.NewWorkspaceClient()
|
||||
require.NoError(t, err)
|
||||
|
||||
nodeTypeId := internal.GetNodeTypeId(env)
|
||||
uniqueId := uuid.New().String()
|
||||
bundleRoot, err := initTestTemplate(t, "job_metadata", map[string]any{
|
||||
"unique_id": uniqueId,
|
||||
"node_type_id": nodeTypeId,
|
||||
"spark_version": "13.2.x-snapshot-scala2.12",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// deploy bundle
|
||||
err = deployBundle(t, bundleRoot)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Cleanup the deployed bundle
|
||||
t.Cleanup(func() {
|
||||
err = destroyBundle(t, bundleRoot)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
// assert job 1 is created
|
||||
jobName := "test-job-metadata-1-" + uniqueId
|
||||
job1, err := w.Jobs.GetBySettingsName(context.Background(), jobName)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, job1.Settings.Name, jobName)
|
||||
|
||||
// assert job 2 is created
|
||||
jobName = "test-job-metadata-2-" + uniqueId
|
||||
job2, err := w.Jobs.GetBySettingsName(context.Background(), jobName)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, job2.Settings.Name, jobName)
|
||||
|
||||
// Compute root path for the bundle deployment
|
||||
me, err := w.CurrentUser.Me(context.Background())
|
||||
require.NoError(t, err)
|
||||
root := fmt.Sprintf("/Users/%s/.bundle/%s", me.UserName, uniqueId)
|
||||
f, err := filer.NewWorkspaceFilesClient(w, root)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Read metadata object from the workspace
|
||||
r, err := f.Read(context.Background(), "state/metadata.json")
|
||||
require.NoError(t, err)
|
||||
b, err := io.ReadAll(r)
|
||||
require.NoError(t, err)
|
||||
actualMetadata := metadata.Metadata{}
|
||||
err = json.Unmarshal(b, &actualMetadata)
|
||||
require.NoError(t, err)
|
||||
|
||||
// expected value for the metadata
|
||||
expectedMetadata := metadata.Metadata{
|
||||
Version: metadata.Version,
|
||||
Config: metadata.Config{
|
||||
Bundle: metadata.Bundle{
|
||||
Git: config.Git{
|
||||
BundleRootPath: ".",
|
||||
},
|
||||
},
|
||||
Workspace: metadata.Workspace{
|
||||
FilesPath: path.Join(root, "files"),
|
||||
},
|
||||
Resources: metadata.Resources{
|
||||
Jobs: map[string]*metadata.Job{
|
||||
"foo": {
|
||||
ID: strconv.FormatInt(job1.JobId, 10),
|
||||
RelativePath: "databricks.yml",
|
||||
},
|
||||
"bar": {
|
||||
ID: strconv.FormatInt(job2.JobId, 10),
|
||||
RelativePath: "a/b/resources.yml",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Assert metadata matches what we expected.
|
||||
assert.Equal(t, expectedMetadata, actualMetadata)
|
||||
}
|
|
@ -40,7 +40,7 @@ type Repository struct {
|
|||
config *config
|
||||
}
|
||||
|
||||
// Root returns the repository root.
|
||||
// Root returns the absolute path to the repository root.
|
||||
func (r *Repository) Root() string {
|
||||
return r.rootPath
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue