2022-11-18 09:57:31 +00:00
|
|
|
package config
|
|
|
|
|
2022-12-15 12:00:41 +00:00
|
|
|
import (
|
2024-02-14 18:04:45 +00:00
|
|
|
"context"
|
2023-04-17 10:21:21 +00:00
|
|
|
"fmt"
|
|
|
|
|
2023-05-16 16:35:39 +00:00
|
|
|
"github.com/databricks/cli/bundle/config/resources"
|
2024-02-14 18:04:45 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go"
|
2022-12-15 12:00:41 +00:00
|
|
|
)
|
2022-11-18 09:57:31 +00:00
|
|
|
|
|
|
|
// Resources defines Databricks resources associated with the bundle.
|
|
|
|
type Resources struct {
|
2022-12-15 12:00:41 +00:00
|
|
|
Jobs map[string]*resources.Job `json:"jobs,omitempty"`
|
|
|
|
Pipelines map[string]*resources.Pipeline `json:"pipelines,omitempty"`
|
2023-03-20 20:28:43 +00:00
|
|
|
|
2023-09-07 21:54:31 +00:00
|
|
|
Models map[string]*resources.MlflowModel `json:"models,omitempty"`
|
|
|
|
Experiments map[string]*resources.MlflowExperiment `json:"experiments,omitempty"`
|
|
|
|
ModelServingEndpoints map[string]*resources.ModelServingEndpoint `json:"model_serving_endpoints,omitempty"`
|
2023-10-16 15:32:49 +00:00
|
|
|
RegisteredModels map[string]*resources.RegisteredModel `json:"registered_models,omitempty"`
|
2022-11-18 09:57:31 +00:00
|
|
|
}
|
2023-04-12 14:17:13 +00:00
|
|
|
|
2023-04-17 10:21:21 +00:00
|
|
|
type UniqueResourceIdTracker struct {
|
|
|
|
Type map[string]string
|
|
|
|
ConfigPath map[string]string
|
|
|
|
}
|
|
|
|
|
|
|
|
// verifies merging is safe by checking no duplicate identifiers exist
|
|
|
|
func (r *Resources) VerifySafeMerge(other *Resources) error {
|
|
|
|
rootTracker, err := r.VerifyUniqueResourceIdentifiers()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
otherTracker, err := other.VerifyUniqueResourceIdentifiers()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for k := range otherTracker.Type {
|
|
|
|
if _, ok := rootTracker.Type[k]; ok {
|
|
|
|
return fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
|
|
|
k,
|
|
|
|
rootTracker.Type[k],
|
|
|
|
rootTracker.ConfigPath[k],
|
|
|
|
otherTracker.Type[k],
|
|
|
|
otherTracker.ConfigPath[k],
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// This function verifies there are no duplicate names used for the resource definations
|
|
|
|
func (r *Resources) VerifyUniqueResourceIdentifiers() (*UniqueResourceIdTracker, error) {
|
|
|
|
tracker := &UniqueResourceIdTracker{
|
|
|
|
Type: make(map[string]string),
|
|
|
|
ConfigPath: make(map[string]string),
|
|
|
|
}
|
|
|
|
for k := range r.Jobs {
|
|
|
|
tracker.Type[k] = "job"
|
|
|
|
tracker.ConfigPath[k] = r.Jobs[k].ConfigFilePath
|
|
|
|
}
|
|
|
|
for k := range r.Pipelines {
|
|
|
|
if _, ok := tracker.Type[k]; ok {
|
|
|
|
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
|
|
|
k,
|
|
|
|
tracker.Type[k],
|
|
|
|
tracker.ConfigPath[k],
|
|
|
|
"pipeline",
|
|
|
|
r.Pipelines[k].ConfigFilePath,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
tracker.Type[k] = "pipeline"
|
|
|
|
tracker.ConfigPath[k] = r.Pipelines[k].ConfigFilePath
|
|
|
|
}
|
|
|
|
for k := range r.Models {
|
|
|
|
if _, ok := tracker.Type[k]; ok {
|
|
|
|
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
|
|
|
k,
|
|
|
|
tracker.Type[k],
|
|
|
|
tracker.ConfigPath[k],
|
|
|
|
"mlflow_model",
|
|
|
|
r.Models[k].ConfigFilePath,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
tracker.Type[k] = "mlflow_model"
|
|
|
|
tracker.ConfigPath[k] = r.Models[k].ConfigFilePath
|
|
|
|
}
|
|
|
|
for k := range r.Experiments {
|
|
|
|
if _, ok := tracker.Type[k]; ok {
|
|
|
|
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
|
|
|
k,
|
|
|
|
tracker.Type[k],
|
|
|
|
tracker.ConfigPath[k],
|
|
|
|
"mlflow_experiment",
|
|
|
|
r.Experiments[k].ConfigFilePath,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
tracker.Type[k] = "mlflow_experiment"
|
|
|
|
tracker.ConfigPath[k] = r.Experiments[k].ConfigFilePath
|
|
|
|
}
|
2023-09-07 21:54:31 +00:00
|
|
|
for k := range r.ModelServingEndpoints {
|
|
|
|
if _, ok := tracker.Type[k]; ok {
|
|
|
|
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
|
|
|
k,
|
|
|
|
tracker.Type[k],
|
|
|
|
tracker.ConfigPath[k],
|
|
|
|
"model_serving_endpoint",
|
|
|
|
r.ModelServingEndpoints[k].ConfigFilePath,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
tracker.Type[k] = "model_serving_endpoint"
|
|
|
|
tracker.ConfigPath[k] = r.ModelServingEndpoints[k].ConfigFilePath
|
|
|
|
}
|
2023-10-16 15:32:49 +00:00
|
|
|
for k := range r.RegisteredModels {
|
|
|
|
if _, ok := tracker.Type[k]; ok {
|
|
|
|
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
|
|
|
k,
|
|
|
|
tracker.Type[k],
|
|
|
|
tracker.ConfigPath[k],
|
|
|
|
"registered_model",
|
|
|
|
r.RegisteredModels[k].ConfigFilePath,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
tracker.Type[k] = "registered_model"
|
|
|
|
tracker.ConfigPath[k] = r.RegisteredModels[k].ConfigFilePath
|
|
|
|
}
|
2023-04-17 10:21:21 +00:00
|
|
|
return tracker, nil
|
|
|
|
}
|
|
|
|
|
2023-04-12 14:17:13 +00:00
|
|
|
// SetConfigFilePath sets the specified path for all resources contained in this instance.
|
|
|
|
// This property is used to correctly resolve paths relative to the path
|
|
|
|
// of the configuration file they were defined in.
|
|
|
|
func (r *Resources) SetConfigFilePath(path string) {
|
|
|
|
for _, e := range r.Jobs {
|
|
|
|
e.ConfigFilePath = path
|
|
|
|
}
|
|
|
|
for _, e := range r.Pipelines {
|
|
|
|
e.ConfigFilePath = path
|
|
|
|
}
|
|
|
|
for _, e := range r.Models {
|
|
|
|
e.ConfigFilePath = path
|
|
|
|
}
|
|
|
|
for _, e := range r.Experiments {
|
|
|
|
e.ConfigFilePath = path
|
|
|
|
}
|
2023-09-07 21:54:31 +00:00
|
|
|
for _, e := range r.ModelServingEndpoints {
|
|
|
|
e.ConfigFilePath = path
|
|
|
|
}
|
2023-10-16 15:32:49 +00:00
|
|
|
for _, e := range r.RegisteredModels {
|
|
|
|
e.ConfigFilePath = path
|
|
|
|
}
|
2023-04-12 14:17:13 +00:00
|
|
|
}
|
2023-08-14 06:43:45 +00:00
|
|
|
|
2023-09-21 19:21:20 +00:00
|
|
|
// Merge iterates over all resources and merges chunks of the
|
|
|
|
// resource configuration that can be merged. For example, for
|
|
|
|
// jobs, this merges job cluster definitions and tasks that
|
|
|
|
// use the same `job_cluster_key`, or `task_key`, respectively.
|
|
|
|
func (r *Resources) Merge() error {
|
2023-08-14 06:43:45 +00:00
|
|
|
for _, job := range r.Jobs {
|
|
|
|
if err := job.MergeJobClusters(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2023-09-18 14:13:50 +00:00
|
|
|
if err := job.MergeTasks(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2023-09-21 19:21:20 +00:00
|
|
|
for _, pipeline := range r.Pipelines {
|
|
|
|
if err := pipeline.MergeClusters(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2023-09-18 14:13:50 +00:00
|
|
|
return nil
|
|
|
|
}
|
2024-02-14 18:04:45 +00:00
|
|
|
|
|
|
|
type ConfigResource interface {
|
|
|
|
Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error)
|
|
|
|
TerraformResourceName() string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *Resources) FindResourceByConfigKey(key string) (ConfigResource, error) {
|
|
|
|
found := make([]ConfigResource, 0)
|
|
|
|
for k := range r.Jobs {
|
|
|
|
if k == key {
|
|
|
|
found = append(found, r.Jobs[k])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for k := range r.Pipelines {
|
|
|
|
if k == key {
|
|
|
|
found = append(found, r.Pipelines[k])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(found) == 0 {
|
|
|
|
return nil, fmt.Errorf("no such resource: %s", key)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(found) > 1 {
|
|
|
|
keys := make([]string, 0, len(found))
|
|
|
|
for _, r := range found {
|
|
|
|
keys = append(keys, fmt.Sprintf("%s:%s", r.TerraformResourceName(), key))
|
|
|
|
}
|
|
|
|
return nil, fmt.Errorf("ambiguous: %s (can resolve to all of %s)", key, keys)
|
|
|
|
}
|
|
|
|
|
|
|
|
return found[0], nil
|
|
|
|
}
|