diff --git a/bundle/bundle.go b/bundle/bundle.go index fd9c131f..a2d774bb 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -15,6 +15,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/env" + "github.com/databricks/cli/bundle/metadata" "github.com/databricks/cli/folders" "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/locker" @@ -31,6 +32,14 @@ const internalFolder = ".internal" type Bundle struct { Config config.Root + // Metadata about the bundle deployment. This is the interface Databricks services + // rely on to integrate with bundles when they need additional information about + // a bundle deployment. + // + // After deploy, a file containing the metadata (metadata.json) can be found + // in the WSFS location containing the bundle state. + Metadata metadata.Metadata + // Store a pointer to the workspace client. // It can be initialized on demand after loading the configuration. clientOnce sync.Once diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index d444f507..933e88bf 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -29,7 +29,7 @@ type Bundle struct { Lock Lock `json:"lock" bundle:"readonly"` // Force-override Git branch validation. - Force bool `json:"force" bundle:"readonly"` + Force bool `json:"force,omitempty" bundle:"readonly"` // Contains Git information like current commit, current branch and // origin url. Automatically loaded by reading .git directory if not specified diff --git a/bundle/config/git.go b/bundle/config/git.go index 760134a8..58a5d54d 100644 --- a/bundle/config/git.go +++ b/bundle/config/git.go @@ -5,6 +5,9 @@ type Git struct { OriginURL string `json:"origin_url,omitempty"` Commit string `json:"commit,omitempty" bundle:"readonly"` + // Path to bundle root relative to the git repository root. + BundleRootPath string `json:"bundle_root_path,omitempty" bundle:"readonly"` + // Inferred is set to true if the Git details were inferred and weren't set explicitly Inferred bool `json:"-" bundle:"readonly"` diff --git a/bundle/config/lock.go b/bundle/config/lock.go index 28d5a5ac..760099a9 100644 --- a/bundle/config/lock.go +++ b/bundle/config/lock.go @@ -4,11 +4,11 @@ type Lock struct { // Enabled toggles deployment lock. True by default. // Use a pointer value so that only explicitly configured values are set // and we don't merge configuration with zero-initialized values. - Enabled *bool `json:"enabled"` + Enabled *bool `json:"enabled,omitempty"` // Force acquisition of deployment lock even if it is currently held. // This may be necessary if a prior deployment failed to release the lock. - Force bool `json:"force"` + Force bool `json:"force,omitempty"` } func (lock Lock) IsEnabled() bool { diff --git a/bundle/config/mutator/load_git_details.go b/bundle/config/mutator/load_git_details.go index ab47677d..3a50d683 100644 --- a/bundle/config/mutator/load_git_details.go +++ b/bundle/config/mutator/load_git_details.go @@ -2,6 +2,7 @@ package mutator import ( "context" + "path/filepath" "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/git" @@ -52,5 +53,17 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) error { remoteUrl := repo.OriginUrl() b.Config.Bundle.Git.OriginURL = remoteUrl } + + // Compute relative path of the bundle root from the Git repo root. + absBundlePath, err := filepath.Abs(b.Config.Path) + if err != nil { + return err + } + // repo.Root() returns the absolute path of the repo + relBundlePath, err := filepath.Rel(repo.Root(), absBundlePath) + if err != nil { + return err + } + b.Config.Bundle.Git.BundleRootPath = filepath.ToSlash(relBundlePath) return nil } diff --git a/bundle/config/paths/paths.go b/bundle/config/paths/paths.go index c2cbcb7d..2c9ecb8c 100644 --- a/bundle/config/paths/paths.go +++ b/bundle/config/paths/paths.go @@ -6,8 +6,8 @@ import ( ) type Paths struct { - // ConfigFilePath holds the path to the configuration file that - // described the resource that this type is embedded in. + // Absolute path on the local file system to the configuration file that holds + // the definition of this resource. ConfigFilePath string `json:"-" bundle:"readonly"` } diff --git a/bundle/deploy/metadata/compute.go b/bundle/deploy/metadata/compute.go new file mode 100644 index 00000000..9a3ae0e3 --- /dev/null +++ b/bundle/deploy/metadata/compute.go @@ -0,0 +1,51 @@ +package metadata + +import ( + "context" + "fmt" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/metadata" +) + +type compute struct{} + +func Compute() bundle.Mutator { + return &compute{} +} + +func (m *compute) Name() string { + return "metadata.Compute" +} + +func (m *compute) Apply(_ context.Context, b *bundle.Bundle) error { + b.Metadata = metadata.Metadata{ + Version: metadata.Version, + Config: metadata.Config{}, + } + + // Set git details in metadata + b.Metadata.Config.Bundle.Git = b.Config.Bundle.Git + + // Set job config paths in metadata + jobsMetadata := make(map[string]*metadata.Job) + for name, job := range b.Config.Resources.Jobs { + // Compute config file path the job is defined in, relative to the bundle + // root + relativePath, err := filepath.Rel(b.Config.Path, job.ConfigFilePath) + if err != nil { + return fmt.Errorf("failed to compute relative path for job %s: %w", name, err) + } + // Metadata for the job + jobsMetadata[name] = &metadata.Job{ + ID: job.ID, + RelativePath: filepath.ToSlash(relativePath), + } + } + b.Metadata.Config.Resources.Jobs = jobsMetadata + + // Set file upload destination of the bundle in metadata + b.Metadata.Config.Workspace.FilesPath = b.Config.Workspace.FilesPath + return nil +} diff --git a/bundle/deploy/metadata/compute_test.go b/bundle/deploy/metadata/compute_test.go new file mode 100644 index 00000000..9e4b475c --- /dev/null +++ b/bundle/deploy/metadata/compute_test.go @@ -0,0 +1,100 @@ +package metadata + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/paths" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/metadata" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestComputeMetadataMutator(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Users/shreyas.goenka@databricks.com", + ArtifactsPath: "/Users/shreyas.goenka@databricks.com/artifacts", + FilesPath: "/Users/shreyas.goenka@databricks.com/files", + }, + Bundle: config.Bundle{ + Name: "my-bundle", + Target: "development", + Git: config.Git{ + Branch: "my-branch", + OriginURL: "www.host.com", + Commit: "abcd", + BundleRootPath: "a/b/c/d", + }, + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "my-job-1": { + Paths: paths.Paths{ + ConfigFilePath: "a/b/c", + }, + ID: "1111", + JobSettings: &jobs.JobSettings{ + Name: "My Job One", + }, + }, + "my-job-2": { + Paths: paths.Paths{ + ConfigFilePath: "d/e/f", + }, + ID: "2222", + JobSettings: &jobs.JobSettings{ + Name: "My Job Two", + }, + }, + }, + Pipelines: map[string]*resources.Pipeline{ + "my-pipeline": { + Paths: paths.Paths{ + ConfigFilePath: "abc", + }, + }, + }, + }, + }, + } + + expectedMetadata := metadata.Metadata{ + Version: metadata.Version, + Config: metadata.Config{ + Workspace: metadata.Workspace{ + FilesPath: "/Users/shreyas.goenka@databricks.com/files", + }, + Bundle: metadata.Bundle{ + Git: config.Git{ + Branch: "my-branch", + OriginURL: "www.host.com", + Commit: "abcd", + BundleRootPath: "a/b/c/d", + }, + }, + Resources: metadata.Resources{ + Jobs: map[string]*metadata.Job{ + "my-job-1": { + RelativePath: "a/b/c", + ID: "1111", + }, + "my-job-2": { + RelativePath: "d/e/f", + ID: "2222", + }, + }, + }, + }, + } + + err := Compute().Apply(context.Background(), b) + require.NoError(t, err) + + assert.Equal(t, expectedMetadata, b.Metadata) +} diff --git a/bundle/deploy/metadata/upload.go b/bundle/deploy/metadata/upload.go new file mode 100644 index 00000000..f550a66e --- /dev/null +++ b/bundle/deploy/metadata/upload.go @@ -0,0 +1,36 @@ +package metadata + +import ( + "bytes" + "context" + "encoding/json" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/filer" +) + +const MetadataFileName = "metadata.json" + +type upload struct{} + +func Upload() bundle.Mutator { + return &upload{} +} + +func (m *upload) Name() string { + return "metadata.Upload" +} + +func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) error { + f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(), b.Config.Workspace.StatePath) + if err != nil { + return err + } + + metadata, err := json.MarshalIndent(b.Metadata, "", " ") + if err != nil { + return err + } + + return f.Write(ctx, MetadataFileName, bytes.NewReader(metadata), filer.CreateParentDirectories, filer.OverwriteIfExists) +} diff --git a/bundle/metadata/metadata.go b/bundle/metadata/metadata.go new file mode 100644 index 00000000..27edd584 --- /dev/null +++ b/bundle/metadata/metadata.go @@ -0,0 +1,45 @@ +package metadata + +import ( + "github.com/databricks/cli/bundle/config" +) + +const Version = 1 + +type Bundle struct { + Git config.Git `json:"git,omitempty"` +} + +type Workspace struct { + FilesPath string `json:"file_path,omitempty"` +} + +type Job struct { + ID string `json:"id,omitempty"` + + // Relative path from the bundle root to the configuration file that holds + // the definition of this resource. + RelativePath string `json:"relative_path,omitempty"` +} + +type Resources struct { + Jobs map[string]*Job `json:"jobs,omitempty"` +} + +type Config struct { + Bundle Bundle `json:"bundle,omitempty"` + Workspace Workspace `json:"workspace,omitempty"` + Resources Resources `json:"resources,omitempty"` +} + +// Metadata about the bundle deployment. This is the interface Databricks services +// rely on to integrate with bundles when they need additional information about +// a bundle deployment. +// +// After deploy, a file containing the metadata (metadata.json) can be found +// in the WSFS location containing the bundle state. +type Metadata struct { + Version int `json:"version"` + + Config Config `json:"config"` +} diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 6c75218b..805bae80 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -7,6 +7,7 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" + "github.com/databricks/cli/bundle/deploy/metadata" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/bundle/python" @@ -31,7 +32,12 @@ func Deploy() bundle.Mutator { terraform.StatePull(), bundle.Defer( terraform.Apply(), - terraform.StatePush(), + bundle.Seq( + terraform.StatePush(), + terraform.Load(), + metadata.Compute(), + metadata.Upload(), + ), ), ), lock.Release(lock.GoalDeploy), diff --git a/internal/bundle/bundles/deploy_then_remove_resources/databricks_template_schema.json b/internal/bundle/bundles/deploy_then_remove_resources/databricks_template_schema.json index cfed842c..8fca7a7c 100644 --- a/internal/bundle/bundles/deploy_then_remove_resources/databricks_template_schema.json +++ b/internal/bundle/bundles/deploy_then_remove_resources/databricks_template_schema.json @@ -2,7 +2,7 @@ "properties": { "unique_id": { "type": "string", - "description": "Unique ID for job name" + "description": "Unique ID for pipeline name" } } } diff --git a/internal/bundle/bundles/job_metadata/databricks_template_schema.json b/internal/bundle/bundles/job_metadata/databricks_template_schema.json new file mode 100644 index 00000000..c1c5cf12 --- /dev/null +++ b/internal/bundle/bundles/job_metadata/databricks_template_schema.json @@ -0,0 +1,16 @@ +{ + "properties": { + "unique_id": { + "type": "string", + "description": "Unique ID for job name" + }, + "spark_version": { + "type": "string", + "description": "Spark version used for job cluster" + }, + "node_type_id": { + "type": "string", + "description": "Node type id for job cluster" + } + } +} diff --git a/internal/bundle/bundles/job_metadata/template/a/b/bar.py b/internal/bundle/bundles/job_metadata/template/a/b/bar.py new file mode 100644 index 00000000..6f463767 --- /dev/null +++ b/internal/bundle/bundles/job_metadata/template/a/b/bar.py @@ -0,0 +1,2 @@ +# Databricks notebook source +print("bye") diff --git a/internal/bundle/bundles/job_metadata/template/a/b/resources.yml.tmpl b/internal/bundle/bundles/job_metadata/template/a/b/resources.yml.tmpl new file mode 100644 index 00000000..bdba05f5 --- /dev/null +++ b/internal/bundle/bundles/job_metadata/template/a/b/resources.yml.tmpl @@ -0,0 +1,12 @@ +resources: + jobs: + bar: + name: test-job-metadata-2-{{.unique_id}} + tasks: + - task_key: my_notebook_task + new_cluster: + num_workers: 1 + spark_version: "{{.spark_version}}" + node_type_id: "{{.node_type_id}}" + notebook_task: + notebook_path: "./bar.py" diff --git a/internal/bundle/bundles/job_metadata/template/databricks.yml.tmpl b/internal/bundle/bundles/job_metadata/template/databricks.yml.tmpl new file mode 100644 index 00000000..7aaabadd --- /dev/null +++ b/internal/bundle/bundles/job_metadata/template/databricks.yml.tmpl @@ -0,0 +1,21 @@ +bundle: + name: job-metadata + +workspace: + root_path: "~/.bundle/{{.unique_id}}" + +include: + - "a/b/*.yml" + +resources: + jobs: + foo: + name: test-job-metadata-1-{{.unique_id}} + tasks: + - task_key: my_notebook_task + new_cluster: + num_workers: 1 + spark_version: "{{.spark_version}}" + node_type_id: "{{.node_type_id}}" + notebook_task: + notebook_path: "./foo.py" diff --git a/internal/bundle/bundles/job_metadata/template/foo.py b/internal/bundle/bundles/job_metadata/template/foo.py new file mode 100644 index 00000000..4914a743 --- /dev/null +++ b/internal/bundle/bundles/job_metadata/template/foo.py @@ -0,0 +1,2 @@ +# Databricks notebook source +print("hello") diff --git a/internal/bundle/job_metadata_test.go b/internal/bundle/job_metadata_test.go new file mode 100644 index 00000000..70962c4c --- /dev/null +++ b/internal/bundle/job_metadata_test.go @@ -0,0 +1,105 @@ +package bundle + +import ( + "context" + "encoding/json" + "fmt" + "io" + "path" + "strconv" + "testing" + + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/metadata" + "github.com/databricks/cli/internal" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/databricks-sdk-go" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAccJobsMetadataFile(t *testing.T) { + env := internal.GetEnvOrSkipTest(t, "CLOUD_ENV") + t.Log(env) + + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + nodeTypeId := internal.GetNodeTypeId(env) + uniqueId := uuid.New().String() + bundleRoot, err := initTestTemplate(t, "job_metadata", map[string]any{ + "unique_id": uniqueId, + "node_type_id": nodeTypeId, + "spark_version": "13.2.x-snapshot-scala2.12", + }) + require.NoError(t, err) + + // deploy bundle + err = deployBundle(t, bundleRoot) + require.NoError(t, err) + + // Cleanup the deployed bundle + t.Cleanup(func() { + err = destroyBundle(t, bundleRoot) + require.NoError(t, err) + }) + + // assert job 1 is created + jobName := "test-job-metadata-1-" + uniqueId + job1, err := w.Jobs.GetBySettingsName(context.Background(), jobName) + require.NoError(t, err) + assert.Equal(t, job1.Settings.Name, jobName) + + // assert job 2 is created + jobName = "test-job-metadata-2-" + uniqueId + job2, err := w.Jobs.GetBySettingsName(context.Background(), jobName) + require.NoError(t, err) + assert.Equal(t, job2.Settings.Name, jobName) + + // Compute root path for the bundle deployment + me, err := w.CurrentUser.Me(context.Background()) + require.NoError(t, err) + root := fmt.Sprintf("/Users/%s/.bundle/%s", me.UserName, uniqueId) + f, err := filer.NewWorkspaceFilesClient(w, root) + require.NoError(t, err) + + // Read metadata object from the workspace + r, err := f.Read(context.Background(), "state/metadata.json") + require.NoError(t, err) + b, err := io.ReadAll(r) + require.NoError(t, err) + actualMetadata := metadata.Metadata{} + err = json.Unmarshal(b, &actualMetadata) + require.NoError(t, err) + + // expected value for the metadata + expectedMetadata := metadata.Metadata{ + Version: metadata.Version, + Config: metadata.Config{ + Bundle: metadata.Bundle{ + Git: config.Git{ + BundleRootPath: ".", + }, + }, + Workspace: metadata.Workspace{ + FilesPath: path.Join(root, "files"), + }, + Resources: metadata.Resources{ + Jobs: map[string]*metadata.Job{ + "foo": { + ID: strconv.FormatInt(job1.JobId, 10), + RelativePath: "databricks.yml", + }, + "bar": { + ID: strconv.FormatInt(job2.JobId, 10), + RelativePath: "a/b/resources.yml", + }, + }, + }, + }, + } + + // Assert metadata matches what we expected. + assert.Equal(t, expectedMetadata, actualMetadata) +} diff --git a/libs/git/repository.go b/libs/git/repository.go index 9c847687..d1641118 100644 --- a/libs/git/repository.go +++ b/libs/git/repository.go @@ -40,7 +40,7 @@ type Repository struct { config *config } -// Root returns the repository root. +// Root returns the absolute path to the repository root. func (r *Repository) Root() string { return r.rootPath }