Persist deployment metadata in WSFS (#845)

## Changes

This PR introduces a metadata struct that stores a subset of bundle
configuration that we wish to expose to other Databricks services that
wish to integrate with bundles.

This metadata file is uploaded to a file
`${bundle.workspace.state_path}/metadata.json` in the WSFS destination
of the bundle deployment.

Documentation for emitted metadata fields:
* `version`: Version for the metadata file schema
* `config.bundle.git.branch`: Name of the git branch the bundle was
deployed from.
* `config.bundle.git.origin_url`: URL for git remote "origin"
* `config.bundle.git.bundle_root_path`: Relative path of the bundle root
from the root of the git repository. Is set to "." if they are the same.
* `config.bundle.git.commit`: SHA-1 commit hash of the exact commit this
bundle was deployed from. Note, the deployment might not exactly match
this commit version if there are changes that have not been committed to
git at deploy time,
* `file_path`: Path in workspace where we sync bundle files to. 
* `resources.jobs.[job-ref].id`: Id of the job
* `resources.jobs.[job-ref].relative_path`: Relative path of the yaml
config file from the bundle root where this job was defined.

Example metadata object when bundle root and git root are the same:
```json
{
  "version": 1,
  "config": {
    "bundle": {
      "lock": {},
      "git": {
        "branch": "master",
        "origin_url": "www.host.com",
        "commit": "7af8e5d3f5dceffff9295d42d21606ccf056dce0",
        "bundle_root_path": "."
      }
    },
    "workspace": {
      "file_path": "/Users/shreyas.goenka@databricks.com/.bundle/pipeline-progress/default/files"
    },
    "resources": {
      "jobs": {
        "bar": {
          "id": "245921165354846",
          "relative_path": "databricks.yml"
        }
      }
    },
    "sync": {}
  }
}
```

Example metadata when the git root is one level above the bundle repo:
```json
{
  "version": 1,
  "config": {
    "bundle": {
      "lock": {},
      "git": {
        "branch": "dev-branch",
        "origin_url": "www.my-repo.com",
        "commit": "3db46ef750998952b00a2b3e7991e31787e4b98b",
        "bundle_root_path": "pipeline-progress"
      }
    },
    "workspace": {
      "file_path": "/Users/shreyas.goenka@databricks.com/.bundle/pipeline-progress/default/files"
    },
    "resources": {
      "jobs": {
        "bar": {
          "id": "245921165354846",
          "relative_path": "databricks.yml"
        }
      }
    },
    "sync": {}
  }
}
```


This unblocks integration to the jobs break glass UI for bundles.

## Tests
Unit tests and integration tests.
This commit is contained in:
shreyas-goenka 2023-10-27 14:55:43 +02:00 committed by GitHub
parent 905fe10e62
commit 5a8cd0c5bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 429 additions and 8 deletions

View File

@ -15,6 +15,7 @@ import (
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/env"
"github.com/databricks/cli/bundle/metadata"
"github.com/databricks/cli/folders"
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/locker"
@ -31,6 +32,14 @@ const internalFolder = ".internal"
type Bundle struct {
Config config.Root
// Metadata about the bundle deployment. This is the interface Databricks services
// rely on to integrate with bundles when they need additional information about
// a bundle deployment.
//
// After deploy, a file containing the metadata (metadata.json) can be found
// in the WSFS location containing the bundle state.
Metadata metadata.Metadata
// Store a pointer to the workspace client.
// It can be initialized on demand after loading the configuration.
clientOnce sync.Once

View File

@ -29,7 +29,7 @@ type Bundle struct {
Lock Lock `json:"lock" bundle:"readonly"`
// Force-override Git branch validation.
Force bool `json:"force" bundle:"readonly"`
Force bool `json:"force,omitempty" bundle:"readonly"`
// Contains Git information like current commit, current branch and
// origin url. Automatically loaded by reading .git directory if not specified

View File

@ -5,6 +5,9 @@ type Git struct {
OriginURL string `json:"origin_url,omitempty"`
Commit string `json:"commit,omitempty" bundle:"readonly"`
// Path to bundle root relative to the git repository root.
BundleRootPath string `json:"bundle_root_path,omitempty" bundle:"readonly"`
// Inferred is set to true if the Git details were inferred and weren't set explicitly
Inferred bool `json:"-" bundle:"readonly"`

View File

@ -4,11 +4,11 @@ type Lock struct {
// Enabled toggles deployment lock. True by default.
// Use a pointer value so that only explicitly configured values are set
// and we don't merge configuration with zero-initialized values.
Enabled *bool `json:"enabled"`
Enabled *bool `json:"enabled,omitempty"`
// Force acquisition of deployment lock even if it is currently held.
// This may be necessary if a prior deployment failed to release the lock.
Force bool `json:"force"`
Force bool `json:"force,omitempty"`
}
func (lock Lock) IsEnabled() bool {

View File

@ -2,6 +2,7 @@ package mutator
import (
"context"
"path/filepath"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/git"
@ -52,5 +53,17 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) error {
remoteUrl := repo.OriginUrl()
b.Config.Bundle.Git.OriginURL = remoteUrl
}
// Compute relative path of the bundle root from the Git repo root.
absBundlePath, err := filepath.Abs(b.Config.Path)
if err != nil {
return err
}
// repo.Root() returns the absolute path of the repo
relBundlePath, err := filepath.Rel(repo.Root(), absBundlePath)
if err != nil {
return err
}
b.Config.Bundle.Git.BundleRootPath = filepath.ToSlash(relBundlePath)
return nil
}

View File

@ -6,8 +6,8 @@ import (
)
type Paths struct {
// ConfigFilePath holds the path to the configuration file that
// described the resource that this type is embedded in.
// Absolute path on the local file system to the configuration file that holds
// the definition of this resource.
ConfigFilePath string `json:"-" bundle:"readonly"`
}

View File

@ -0,0 +1,51 @@
package metadata
import (
"context"
"fmt"
"path/filepath"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/metadata"
)
type compute struct{}
func Compute() bundle.Mutator {
return &compute{}
}
func (m *compute) Name() string {
return "metadata.Compute"
}
func (m *compute) Apply(_ context.Context, b *bundle.Bundle) error {
b.Metadata = metadata.Metadata{
Version: metadata.Version,
Config: metadata.Config{},
}
// Set git details in metadata
b.Metadata.Config.Bundle.Git = b.Config.Bundle.Git
// Set job config paths in metadata
jobsMetadata := make(map[string]*metadata.Job)
for name, job := range b.Config.Resources.Jobs {
// Compute config file path the job is defined in, relative to the bundle
// root
relativePath, err := filepath.Rel(b.Config.Path, job.ConfigFilePath)
if err != nil {
return fmt.Errorf("failed to compute relative path for job %s: %w", name, err)
}
// Metadata for the job
jobsMetadata[name] = &metadata.Job{
ID: job.ID,
RelativePath: filepath.ToSlash(relativePath),
}
}
b.Metadata.Config.Resources.Jobs = jobsMetadata
// Set file upload destination of the bundle in metadata
b.Metadata.Config.Workspace.FilesPath = b.Config.Workspace.FilesPath
return nil
}

View File

@ -0,0 +1,100 @@
package metadata
import (
"context"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/paths"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/bundle/metadata"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestComputeMetadataMutator(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
RootPath: "/Users/shreyas.goenka@databricks.com",
ArtifactsPath: "/Users/shreyas.goenka@databricks.com/artifacts",
FilesPath: "/Users/shreyas.goenka@databricks.com/files",
},
Bundle: config.Bundle{
Name: "my-bundle",
Target: "development",
Git: config.Git{
Branch: "my-branch",
OriginURL: "www.host.com",
Commit: "abcd",
BundleRootPath: "a/b/c/d",
},
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"my-job-1": {
Paths: paths.Paths{
ConfigFilePath: "a/b/c",
},
ID: "1111",
JobSettings: &jobs.JobSettings{
Name: "My Job One",
},
},
"my-job-2": {
Paths: paths.Paths{
ConfigFilePath: "d/e/f",
},
ID: "2222",
JobSettings: &jobs.JobSettings{
Name: "My Job Two",
},
},
},
Pipelines: map[string]*resources.Pipeline{
"my-pipeline": {
Paths: paths.Paths{
ConfigFilePath: "abc",
},
},
},
},
},
}
expectedMetadata := metadata.Metadata{
Version: metadata.Version,
Config: metadata.Config{
Workspace: metadata.Workspace{
FilesPath: "/Users/shreyas.goenka@databricks.com/files",
},
Bundle: metadata.Bundle{
Git: config.Git{
Branch: "my-branch",
OriginURL: "www.host.com",
Commit: "abcd",
BundleRootPath: "a/b/c/d",
},
},
Resources: metadata.Resources{
Jobs: map[string]*metadata.Job{
"my-job-1": {
RelativePath: "a/b/c",
ID: "1111",
},
"my-job-2": {
RelativePath: "d/e/f",
ID: "2222",
},
},
},
},
}
err := Compute().Apply(context.Background(), b)
require.NoError(t, err)
assert.Equal(t, expectedMetadata, b.Metadata)
}

View File

@ -0,0 +1,36 @@
package metadata
import (
"bytes"
"context"
"encoding/json"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/filer"
)
const MetadataFileName = "metadata.json"
type upload struct{}
func Upload() bundle.Mutator {
return &upload{}
}
func (m *upload) Name() string {
return "metadata.Upload"
}
func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) error {
f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(), b.Config.Workspace.StatePath)
if err != nil {
return err
}
metadata, err := json.MarshalIndent(b.Metadata, "", " ")
if err != nil {
return err
}
return f.Write(ctx, MetadataFileName, bytes.NewReader(metadata), filer.CreateParentDirectories, filer.OverwriteIfExists)
}

View File

@ -0,0 +1,45 @@
package metadata
import (
"github.com/databricks/cli/bundle/config"
)
const Version = 1
type Bundle struct {
Git config.Git `json:"git,omitempty"`
}
type Workspace struct {
FilesPath string `json:"file_path,omitempty"`
}
type Job struct {
ID string `json:"id,omitempty"`
// Relative path from the bundle root to the configuration file that holds
// the definition of this resource.
RelativePath string `json:"relative_path,omitempty"`
}
type Resources struct {
Jobs map[string]*Job `json:"jobs,omitempty"`
}
type Config struct {
Bundle Bundle `json:"bundle,omitempty"`
Workspace Workspace `json:"workspace,omitempty"`
Resources Resources `json:"resources,omitempty"`
}
// Metadata about the bundle deployment. This is the interface Databricks services
// rely on to integrate with bundles when they need additional information about
// a bundle deployment.
//
// After deploy, a file containing the metadata (metadata.json) can be found
// in the WSFS location containing the bundle state.
type Metadata struct {
Version int `json:"version"`
Config Config `json:"config"`
}

View File

@ -7,6 +7,7 @@ import (
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/cli/bundle/deploy/files"
"github.com/databricks/cli/bundle/deploy/lock"
"github.com/databricks/cli/bundle/deploy/metadata"
"github.com/databricks/cli/bundle/deploy/terraform"
"github.com/databricks/cli/bundle/libraries"
"github.com/databricks/cli/bundle/python"
@ -31,7 +32,12 @@ func Deploy() bundle.Mutator {
terraform.StatePull(),
bundle.Defer(
terraform.Apply(),
terraform.StatePush(),
bundle.Seq(
terraform.StatePush(),
terraform.Load(),
metadata.Compute(),
metadata.Upload(),
),
),
),
lock.Release(lock.GoalDeploy),

View File

@ -2,7 +2,7 @@
"properties": {
"unique_id": {
"type": "string",
"description": "Unique ID for job name"
"description": "Unique ID for pipeline name"
}
}
}

View File

@ -0,0 +1,16 @@
{
"properties": {
"unique_id": {
"type": "string",
"description": "Unique ID for job name"
},
"spark_version": {
"type": "string",
"description": "Spark version used for job cluster"
},
"node_type_id": {
"type": "string",
"description": "Node type id for job cluster"
}
}
}

View File

@ -0,0 +1,2 @@
# Databricks notebook source
print("bye")

View File

@ -0,0 +1,12 @@
resources:
jobs:
bar:
name: test-job-metadata-2-{{.unique_id}}
tasks:
- task_key: my_notebook_task
new_cluster:
num_workers: 1
spark_version: "{{.spark_version}}"
node_type_id: "{{.node_type_id}}"
notebook_task:
notebook_path: "./bar.py"

View File

@ -0,0 +1,21 @@
bundle:
name: job-metadata
workspace:
root_path: "~/.bundle/{{.unique_id}}"
include:
- "a/b/*.yml"
resources:
jobs:
foo:
name: test-job-metadata-1-{{.unique_id}}
tasks:
- task_key: my_notebook_task
new_cluster:
num_workers: 1
spark_version: "{{.spark_version}}"
node_type_id: "{{.node_type_id}}"
notebook_task:
notebook_path: "./foo.py"

View File

@ -0,0 +1,2 @@
# Databricks notebook source
print("hello")

View File

@ -0,0 +1,105 @@
package bundle
import (
"context"
"encoding/json"
"fmt"
"io"
"path"
"strconv"
"testing"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/metadata"
"github.com/databricks/cli/internal"
"github.com/databricks/cli/libs/filer"
"github.com/databricks/databricks-sdk-go"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestAccJobsMetadataFile(t *testing.T) {
env := internal.GetEnvOrSkipTest(t, "CLOUD_ENV")
t.Log(env)
w, err := databricks.NewWorkspaceClient()
require.NoError(t, err)
nodeTypeId := internal.GetNodeTypeId(env)
uniqueId := uuid.New().String()
bundleRoot, err := initTestTemplate(t, "job_metadata", map[string]any{
"unique_id": uniqueId,
"node_type_id": nodeTypeId,
"spark_version": "13.2.x-snapshot-scala2.12",
})
require.NoError(t, err)
// deploy bundle
err = deployBundle(t, bundleRoot)
require.NoError(t, err)
// Cleanup the deployed bundle
t.Cleanup(func() {
err = destroyBundle(t, bundleRoot)
require.NoError(t, err)
})
// assert job 1 is created
jobName := "test-job-metadata-1-" + uniqueId
job1, err := w.Jobs.GetBySettingsName(context.Background(), jobName)
require.NoError(t, err)
assert.Equal(t, job1.Settings.Name, jobName)
// assert job 2 is created
jobName = "test-job-metadata-2-" + uniqueId
job2, err := w.Jobs.GetBySettingsName(context.Background(), jobName)
require.NoError(t, err)
assert.Equal(t, job2.Settings.Name, jobName)
// Compute root path for the bundle deployment
me, err := w.CurrentUser.Me(context.Background())
require.NoError(t, err)
root := fmt.Sprintf("/Users/%s/.bundle/%s", me.UserName, uniqueId)
f, err := filer.NewWorkspaceFilesClient(w, root)
require.NoError(t, err)
// Read metadata object from the workspace
r, err := f.Read(context.Background(), "state/metadata.json")
require.NoError(t, err)
b, err := io.ReadAll(r)
require.NoError(t, err)
actualMetadata := metadata.Metadata{}
err = json.Unmarshal(b, &actualMetadata)
require.NoError(t, err)
// expected value for the metadata
expectedMetadata := metadata.Metadata{
Version: metadata.Version,
Config: metadata.Config{
Bundle: metadata.Bundle{
Git: config.Git{
BundleRootPath: ".",
},
},
Workspace: metadata.Workspace{
FilesPath: path.Join(root, "files"),
},
Resources: metadata.Resources{
Jobs: map[string]*metadata.Job{
"foo": {
ID: strconv.FormatInt(job1.JobId, 10),
RelativePath: "databricks.yml",
},
"bar": {
ID: strconv.FormatInt(job2.JobId, 10),
RelativePath: "a/b/resources.yml",
},
},
},
},
}
// Assert metadata matches what we expected.
assert.Equal(t, expectedMetadata, actualMetadata)
}

View File

@ -40,7 +40,7 @@ type Repository struct {
config *config
}
// Root returns the repository root.
// Root returns the absolute path to the repository root.
func (r *Repository) Root() string {
return r.rootPath
}