From a014d50a6af94c911ee46e57a4e2ffd3c13e8e53 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 17 May 2024 12:10:17 +0200 Subject: [PATCH] Fixed panic when loading incorrectly defined jobs (#1402) ## Changes If only key was defined for a job in YAML config, validate previously failed with segfault. This PR validates that jobs are correctly defined and returns an error if not. ## Tests Added regression test --- .../config/mutator/default_queueing_test.go | 12 +++++- bundle/config/resources.go | 42 +++++++++++++++++++ bundle/config/resources/job.go | 9 ++++ bundle/config/resources/mlflow_experiment.go | 28 +++++++++++++ bundle/config/resources/mlflow_model.go | 28 +++++++++++++ .../resources/model_serving_endpoint.go | 28 +++++++++++++ bundle/config/resources/pipeline.go | 9 ++++ bundle/config/resources/registered_model.go | 28 +++++++++++++ bundle/config/root.go | 8 ++++ bundle/permissions/filter_test.go | 7 ++++ bundle/permissions/mutator_test.go | 19 ++++++++- bundle/permissions/workspace_root_test.go | 4 +- .../my_first_job/resource.yml | 1 + .../my_second_job/resource.yml | 1 + bundle/tests/include_with_glob/job.yml | 1 + bundle/tests/undefined_job/databricks.yml | 8 ++++ bundle/tests/undefined_job_test.go | 12 ++++++ 17 files changed, 239 insertions(+), 6 deletions(-) create mode 100644 bundle/tests/undefined_job/databricks.yml create mode 100644 bundle/tests/undefined_job_test.go diff --git a/bundle/config/mutator/default_queueing_test.go b/bundle/config/mutator/default_queueing_test.go index ea60daf7..d3621663 100644 --- a/bundle/config/mutator/default_queueing_test.go +++ b/bundle/config/mutator/default_queueing_test.go @@ -56,7 +56,11 @@ func TestDefaultQueueingApplyEnableQueueing(t *testing.T) { Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ - "job": {}, + "job": { + JobSettings: &jobs.JobSettings{ + Name: "job", + }, + }, }, }, }, @@ -77,7 +81,11 @@ func TestDefaultQueueingApplyWithMultipleJobs(t *testing.T) { Queue: &jobs.QueueSettings{Enabled: false}, }, }, - "job2": {}, + "job2": { + JobSettings: &jobs.JobSettings{ + Name: "job", + }, + }, "job3": { JobSettings: &jobs.JobSettings{ Queue: &jobs.QueueSettings{Enabled: true}, diff --git a/bundle/config/resources.go b/bundle/config/resources.go index 457360a0..41ffc25c 100644 --- a/bundle/config/resources.go +++ b/bundle/config/resources.go @@ -126,6 +126,47 @@ func (r *Resources) VerifyUniqueResourceIdentifiers() (*UniqueResourceIdTracker, return tracker, nil } +type resource struct { + resource ConfigResource + resource_type string + key string +} + +func (r *Resources) allResources() []resource { + all := make([]resource, 0) + for k, e := range r.Jobs { + all = append(all, resource{resource_type: "job", resource: e, key: k}) + } + for k, e := range r.Pipelines { + all = append(all, resource{resource_type: "pipeline", resource: e, key: k}) + } + for k, e := range r.Models { + all = append(all, resource{resource_type: "model", resource: e, key: k}) + } + for k, e := range r.Experiments { + all = append(all, resource{resource_type: "experiment", resource: e, key: k}) + } + for k, e := range r.ModelServingEndpoints { + all = append(all, resource{resource_type: "serving endpoint", resource: e, key: k}) + } + for k, e := range r.RegisteredModels { + all = append(all, resource{resource_type: "registered model", resource: e, key: k}) + } + return all +} + +func (r *Resources) VerifyAllResourcesDefined() error { + all := r.allResources() + for _, e := range all { + err := e.resource.Validate() + if err != nil { + return fmt.Errorf("%s %s is not defined", e.resource_type, e.key) + } + } + + return nil +} + // ConfigureConfigFilePath sets the specified path for all resources contained in this instance. // This property is used to correctly resolve paths relative to the path // of the configuration file they were defined in. @@ -153,6 +194,7 @@ func (r *Resources) ConfigureConfigFilePath() { type ConfigResource interface { Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) TerraformResourceName() string + Validate() error } func (r *Resources) FindResourceByConfigKey(key string) (ConfigResource, error) { diff --git a/bundle/config/resources/job.go b/bundle/config/resources/job.go index 45e9662d..dde5d566 100644 --- a/bundle/config/resources/job.go +++ b/bundle/config/resources/job.go @@ -2,6 +2,7 @@ package resources import ( "context" + "fmt" "strconv" "github.com/databricks/cli/bundle/config/paths" @@ -47,3 +48,11 @@ func (j *Job) Exists(ctx context.Context, w *databricks.WorkspaceClient, id stri func (j *Job) TerraformResourceName() string { return "databricks_job" } + +func (j *Job) Validate() error { + if j == nil || !j.DynamicValue.IsValid() || j.JobSettings == nil { + return fmt.Errorf("job is not defined") + } + + return nil +} diff --git a/bundle/config/resources/mlflow_experiment.go b/bundle/config/resources/mlflow_experiment.go index 0f53096a..7854ee7e 100644 --- a/bundle/config/resources/mlflow_experiment.go +++ b/bundle/config/resources/mlflow_experiment.go @@ -1,7 +1,12 @@ package resources import ( + "context" + "fmt" + "github.com/databricks/cli/bundle/config/paths" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/marshal" "github.com/databricks/databricks-sdk-go/service/ml" ) @@ -23,3 +28,26 @@ func (s *MlflowExperiment) UnmarshalJSON(b []byte) error { func (s MlflowExperiment) MarshalJSON() ([]byte, error) { return marshal.Marshal(s) } + +func (s *MlflowExperiment) Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) { + _, err := w.Experiments.GetExperiment(ctx, ml.GetExperimentRequest{ + ExperimentId: id, + }) + if err != nil { + log.Debugf(ctx, "experiment %s does not exist", id) + return false, err + } + return true, nil +} + +func (s *MlflowExperiment) TerraformResourceName() string { + return "databricks_mlflow_experiment" +} + +func (s *MlflowExperiment) Validate() error { + if s == nil || !s.DynamicValue.IsValid() { + return fmt.Errorf("experiment is not defined") + } + + return nil +} diff --git a/bundle/config/resources/mlflow_model.go b/bundle/config/resources/mlflow_model.go index 59893aa4..40da9f87 100644 --- a/bundle/config/resources/mlflow_model.go +++ b/bundle/config/resources/mlflow_model.go @@ -1,7 +1,12 @@ package resources import ( + "context" + "fmt" + "github.com/databricks/cli/bundle/config/paths" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/marshal" "github.com/databricks/databricks-sdk-go/service/ml" ) @@ -23,3 +28,26 @@ func (s *MlflowModel) UnmarshalJSON(b []byte) error { func (s MlflowModel) MarshalJSON() ([]byte, error) { return marshal.Marshal(s) } + +func (s *MlflowModel) Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) { + _, err := w.ModelRegistry.GetModel(ctx, ml.GetModelRequest{ + Name: id, + }) + if err != nil { + log.Debugf(ctx, "model %s does not exist", id) + return false, err + } + return true, nil +} + +func (s *MlflowModel) TerraformResourceName() string { + return "databricks_mlflow_model" +} + +func (s *MlflowModel) Validate() error { + if s == nil || !s.DynamicValue.IsValid() { + return fmt.Errorf("model is not defined") + } + + return nil +} diff --git a/bundle/config/resources/model_serving_endpoint.go b/bundle/config/resources/model_serving_endpoint.go index d1d57baf..503cfbbb 100644 --- a/bundle/config/resources/model_serving_endpoint.go +++ b/bundle/config/resources/model_serving_endpoint.go @@ -1,7 +1,12 @@ package resources import ( + "context" + "fmt" + "github.com/databricks/cli/bundle/config/paths" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/marshal" "github.com/databricks/databricks-sdk-go/service/serving" ) @@ -33,3 +38,26 @@ func (s *ModelServingEndpoint) UnmarshalJSON(b []byte) error { func (s ModelServingEndpoint) MarshalJSON() ([]byte, error) { return marshal.Marshal(s) } + +func (s *ModelServingEndpoint) Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) { + _, err := w.ServingEndpoints.Get(ctx, serving.GetServingEndpointRequest{ + Name: id, + }) + if err != nil { + log.Debugf(ctx, "serving endpoint %s does not exist", id) + return false, err + } + return true, nil +} + +func (s *ModelServingEndpoint) TerraformResourceName() string { + return "databricks_model_serving" +} + +func (s *ModelServingEndpoint) Validate() error { + if s == nil || !s.DynamicValue.IsValid() { + return fmt.Errorf("serving endpoint is not defined") + } + + return nil +} diff --git a/bundle/config/resources/pipeline.go b/bundle/config/resources/pipeline.go index 2f9ff8d0..7e914b90 100644 --- a/bundle/config/resources/pipeline.go +++ b/bundle/config/resources/pipeline.go @@ -2,6 +2,7 @@ package resources import ( "context" + "fmt" "github.com/databricks/cli/bundle/config/paths" "github.com/databricks/cli/libs/log" @@ -42,3 +43,11 @@ func (p *Pipeline) Exists(ctx context.Context, w *databricks.WorkspaceClient, id func (p *Pipeline) TerraformResourceName() string { return "databricks_pipeline" } + +func (p *Pipeline) Validate() error { + if p == nil || !p.DynamicValue.IsValid() { + return fmt.Errorf("pipeline is not defined") + } + + return nil +} diff --git a/bundle/config/resources/registered_model.go b/bundle/config/resources/registered_model.go index 7b4b70d1..fba643c6 100644 --- a/bundle/config/resources/registered_model.go +++ b/bundle/config/resources/registered_model.go @@ -1,7 +1,12 @@ package resources import ( + "context" + "fmt" + "github.com/databricks/cli/bundle/config/paths" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/marshal" "github.com/databricks/databricks-sdk-go/service/catalog" ) @@ -34,3 +39,26 @@ func (s *RegisteredModel) UnmarshalJSON(b []byte) error { func (s RegisteredModel) MarshalJSON() ([]byte, error) { return marshal.Marshal(s) } + +func (s *RegisteredModel) Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) { + _, err := w.RegisteredModels.Get(ctx, catalog.GetRegisteredModelRequest{ + FullName: id, + }) + if err != nil { + log.Debugf(ctx, "registered model %s does not exist", id) + return false, err + } + return true, nil +} + +func (s *RegisteredModel) TerraformResourceName() string { + return "databricks_registered_model" +} + +func (s *RegisteredModel) Validate() error { + if s == nil || !s.DynamicValue.IsValid() { + return fmt.Errorf("registered model is not defined") + } + + return nil +} diff --git a/bundle/config/root.go b/bundle/config/root.go index fda3759d..88197c2b 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -138,6 +138,14 @@ func (r *Root) updateWithDynamicValue(nv dyn.Value) error { // Assign the normalized configuration tree. r.value = nv + // At the moment the check has to be done as part of updateWithDynamicValue + // because otherwise ConfigureConfigFilePath will fail with a panic. + // In the future, we should move this check to a separate mutator in initialise phase. + err = r.Resources.VerifyAllResourcesDefined() + if err != nil { + return err + } + // Assign config file paths after converting to typed configuration. r.ConfigureConfigFilePath() return nil diff --git a/bundle/permissions/filter_test.go b/bundle/permissions/filter_test.go index 410fa4be..121ce10d 100644 --- a/bundle/permissions/filter_test.go +++ b/bundle/permissions/filter_test.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/assert" ) @@ -45,9 +46,15 @@ func testFixture(userName string) *bundle.Bundle { Resources: config.Resources{ Jobs: map[string]*resources.Job{ "job1": { + JobSettings: &jobs.JobSettings{ + Name: "job1", + }, Permissions: p, }, "job2": { + JobSettings: &jobs.JobSettings{ + Name: "job2", + }, Permissions: p, }, }, diff --git a/bundle/permissions/mutator_test.go b/bundle/permissions/mutator_test.go index 438a1506..1a177d90 100644 --- a/bundle/permissions/mutator_test.go +++ b/bundle/permissions/mutator_test.go @@ -7,6 +7,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/require" ) @@ -23,8 +24,16 @@ func TestApplyBundlePermissions(t *testing.T) { }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ - "job_1": {}, - "job_2": {}, + "job_1": { + JobSettings: &jobs.JobSettings{ + Name: "job_1", + }, + }, + "job_2": { + JobSettings: &jobs.JobSettings{ + Name: "job_2", + }, + }, }, Pipelines: map[string]*resources.Pipeline{ "pipeline_1": {}, @@ -109,11 +118,17 @@ func TestWarningOnOverlapPermission(t *testing.T) { Resources: config.Resources{ Jobs: map[string]*resources.Job{ "job_1": { + JobSettings: &jobs.JobSettings{ + Name: "job_1", + }, Permissions: []resources.Permission{ {Level: CAN_VIEW, UserName: "TestUser"}, }, }, "job_2": { + JobSettings: &jobs.JobSettings{ + Name: "job_2", + }, Permissions: []resources.Permission{ {Level: CAN_VIEW, UserName: "TestUser2"}, }, diff --git a/bundle/permissions/workspace_root_test.go b/bundle/permissions/workspace_root_test.go index 7dd97b62..5e23a1da 100644 --- a/bundle/permissions/workspace_root_test.go +++ b/bundle/permissions/workspace_root_test.go @@ -30,8 +30,8 @@ func TestApplyWorkspaceRootPermissions(t *testing.T) { }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ - "job_1": {JobSettings: &jobs.JobSettings{}}, - "job_2": {JobSettings: &jobs.JobSettings{}}, + "job_1": {JobSettings: &jobs.JobSettings{Name: "job_1"}}, + "job_2": {JobSettings: &jobs.JobSettings{Name: "job_2"}}, }, Pipelines: map[string]*resources.Pipeline{ "pipeline_1": {PipelineSpec: &pipelines.PipelineSpec{}}, diff --git a/bundle/tests/include_multiple/my_first_job/resource.yml b/bundle/tests/include_multiple/my_first_job/resource.yml index c2be5a16..4bd7c716 100644 --- a/bundle/tests/include_multiple/my_first_job/resource.yml +++ b/bundle/tests/include_multiple/my_first_job/resource.yml @@ -2,3 +2,4 @@ resources: jobs: my_first_job: id: 1 + name: "My First Job" diff --git a/bundle/tests/include_multiple/my_second_job/resource.yml b/bundle/tests/include_multiple/my_second_job/resource.yml index 2c28c462..3a151405 100644 --- a/bundle/tests/include_multiple/my_second_job/resource.yml +++ b/bundle/tests/include_multiple/my_second_job/resource.yml @@ -2,3 +2,4 @@ resources: jobs: my_second_job: id: 2 + name: "My Second Job" diff --git a/bundle/tests/include_with_glob/job.yml b/bundle/tests/include_with_glob/job.yml index 3d609c52..a9857781 100644 --- a/bundle/tests/include_with_glob/job.yml +++ b/bundle/tests/include_with_glob/job.yml @@ -2,3 +2,4 @@ resources: jobs: my_job: id: 1 + name: "My Job" diff --git a/bundle/tests/undefined_job/databricks.yml b/bundle/tests/undefined_job/databricks.yml new file mode 100644 index 00000000..12c19f94 --- /dev/null +++ b/bundle/tests/undefined_job/databricks.yml @@ -0,0 +1,8 @@ +bundle: + name: undefined-job + +resources: + jobs: + undefined: + test: + name: "Test Job" diff --git a/bundle/tests/undefined_job_test.go b/bundle/tests/undefined_job_test.go new file mode 100644 index 00000000..ed502c47 --- /dev/null +++ b/bundle/tests/undefined_job_test.go @@ -0,0 +1,12 @@ +package config_tests + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestUndefinedJobLoadsWithError(t *testing.T) { + _, diags := loadTargetWithDiags("./undefined_job", "default") + assert.ErrorContains(t, diags.Error(), "job undefined is not defined") +}