From 8568d92b3d07c3964eb3551307c6dd890b652030 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 30 Dec 2024 16:53:22 +0530 Subject: [PATCH] Add fast validate method --- bundle/config/validate/fast_validate.go | 51 +++++++++++++ bundle/config/validate/validate.go | 10 +-- .../config/validate/validate_artifact_path.go | 1 - .../validate/validate_artifact_path_test.go | 52 ++++++++++++- bundle/libraries/filer_volume.go | 74 +------------------ cmd/bundle/deploy.go | 2 + 6 files changed, 112 insertions(+), 78 deletions(-) create mode 100644 bundle/config/validate/fast_validate.go diff --git a/bundle/config/validate/fast_validate.go b/bundle/config/validate/fast_validate.go new file mode 100644 index 000000000..e5cc32b0c --- /dev/null +++ b/bundle/config/validate/fast_validate.go @@ -0,0 +1,51 @@ +package validate + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +// FastValidate runs a set of fast validation checks. This is a subset of the full +// suite of validation mutators that satisfy ANY ONE of the following criteria: +// +// 1. No file i/o or network requests are made in the mutator. +// 2. Only returns errors which are blocking for a bundle deployment. +// +// The full suite of validation mutators is available in the [Validate] mutator. +type fastValidateReadonly struct{} + +func FastValidateReadonly() bundle.ReadOnlyMutator { + return &fastValidateReadonly{} +} + +func (f *fastValidateReadonly) Name() string { + return "fast_validate(readonly)" +} + +func (f *fastValidateReadonly) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics { + return bundle.ApplyReadOnly(ctx, rb, bundle.Parallel( + // Fast mutators with only in-memory checks + JobClusterKeyDefined(), + JobTaskClusterSpec(), + SingleNodeCluster(), + + // Blocking mutators. Deployments will fail if these checks fail. + ValidateArtifactPath(), + )) +} + +type fastValidate struct{} + +func FastValidate() bundle.Mutator { + return &fastValidate{} +} + +func (f *fastValidate) Name() string { + return "fast_validate" +} + +func (f *fastValidate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + return bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), FastValidateReadonly()) +} diff --git a/bundle/config/validate/validate.go b/bundle/config/validate/validate.go index 45661bd80..8fdd704ab 100644 --- a/bundle/config/validate/validate.go +++ b/bundle/config/validate/validate.go @@ -30,13 +30,13 @@ func (l location) Path() dyn.Path { // Apply implements bundle.Mutator. func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), bundle.Parallel( - JobClusterKeyDefined(), + FastValidateReadonly(), + + // Slow mutators that require network or file i/o. These are only + // run in the `bundle validate` command. FilesToSync(), - ValidateSyncPatterns(), - JobTaskClusterSpec(), ValidateFolderPermissions(), - SingleNodeCluster(), - ValidateArtifactPath(), + ValidateSyncPatterns(), )) } diff --git a/bundle/config/validate/validate_artifact_path.go b/bundle/config/validate/validate_artifact_path.go index c8b65983f..632bce1ce 100644 --- a/bundle/config/validate/validate_artifact_path.go +++ b/bundle/config/validate/validate_artifact_path.go @@ -26,7 +26,6 @@ func (v *validateArtifactPath) Name() string { func (v *validateArtifactPath) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics { // We only validate UC Volumes paths right now. - // TODO? if !libraries.IsVolumesPath(rb.Config().Workspace.ArtifactPath) { return nil } diff --git a/bundle/config/validate/validate_artifact_path_test.go b/bundle/config/validate/validate_artifact_path_test.go index c91da3aae..26e58bb9e 100644 --- a/bundle/config/validate/validate_artifact_path_test.go +++ b/bundle/config/validate/validate_artifact_path_test.go @@ -6,6 +6,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/config/validate" "github.com/databricks/cli/bundle/internal/bundletest" "github.com/databricks/cli/libs/diag" @@ -17,7 +18,56 @@ import ( "github.com/stretchr/testify/mock" ) -func TestVali +func TestValidateArtifactPathWithVolumeInBundle(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + ArtifactPath: "/Volumes/catalogN/schemaN/volumeN/abc", + }, + Resources: config.Resources{ + Volumes: map[string]*resources.Volume{ + "foo": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalogN", + Name: "volumeN", + VolumeType: "MANAGED", + SchemaName: "schemaN", + }, + }, + }, + }, + }, + } + + bundletest.SetLocation(b, "workspace.artifact_path", []dyn.Location{{File: "file", Line: 1, Column: 1}}) + bundletest.SetLocation(b, "resources.volumes.foo", []dyn.Location{{File: "file", Line: 2, Column: 2}}) + + ctx := context.Background() + m := mocks.NewMockWorkspaceClient(t) + api := m.GetMockGrantsAPI() + api.EXPECT().GetEffectiveBySecurableTypeAndFullName(mock.Anything, catalog.SecurableTypeVolume, "catalogN.schemaN.volumeN").Return(nil, &apierr.APIError{ + StatusCode: 404, + }) + b.SetWorkpaceClient(m.WorkspaceClient) + + diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), validate.ValidateArtifactPath()) + assert.Equal(t, diag.Diagnostics{{ + Severity: diag.Error, + Summary: "volume catalogN.schemaN.volumeN does not exist", + Locations: []dyn.Location{ + {File: "file", Line: 1, Column: 1}, + {File: "file", Line: 2, Column: 2}, + }, + Paths: []dyn.Path{ + dyn.MustPathFromString("workspace.artifact_path"), + dyn.MustPathFromString("resources.volumes.foo"), + }, + Detail: `You are using a volume in your artifact_path that is managed by +this bundle but which has not been deployed yet. Please first deploy +the volume using 'bundle deploy' and then switch over to using it in +the artifact_path.`, + }}, diags) +} func TestValidateArtifactPath(t *testing.T) { b := &bundle.Bundle{ diff --git a/bundle/libraries/filer_volume.go b/bundle/libraries/filer_volume.go index 94ff74a4c..b1ba4b769 100644 --- a/bundle/libraries/filer_volume.go +++ b/bundle/libraries/filer_volume.go @@ -2,7 +2,6 @@ package libraries import ( "context" - "errors" "fmt" "path" "strings" @@ -13,7 +12,6 @@ import ( "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/dynvar" "github.com/databricks/cli/libs/filer" - "github.com/databricks/databricks-sdk-go/apierr" ) func ExtractVolumeFromPath(artifactPath string) (string, string, string, error) { @@ -41,77 +39,11 @@ func ExtractVolumeFromPath(artifactPath string) (string, string, string, error) return catalogName, schemaName, volumeName, nil } -// This function returns a filer for ".internal" folder inside the directory configured -// at `workspace.artifact_path`. -// This function also checks if the UC volume exists in the workspace and then: -// 1. If the UC volume exists in the workspace: -// Returns a filer for the UC volume. -// 2. If the UC volume does not exist in the workspace but is (with high confidence) defined in -// the bundle configuration: -// Returns an error and a warning that instructs the user to deploy the -// UC volume before using it in the artifact path. -// 3. If the UC volume does not exist in the workspace and is not defined in the bundle configuration: -// Returns an error. func filerForVolume(ctx context.Context, b *bundle.Bundle) (filer.Filer, string, diag.Diagnostics) { - artifactPath := b.Config.Workspace.ArtifactPath w := b.WorkspaceClient() - - catalogName, schemaName, volumeName, err := ExtractVolumeFromPath(artifactPath) - if err != nil { - return nil, "", diag.Diagnostics{ - { - Severity: diag.Error, - Summary: err.Error(), - Locations: b.Config.GetLocations("workspace.artifact_path"), - Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, - }, - } - } - - // Check if the UC volume exists in the workspace. - volumeFullName := fmt.Sprintf("%s.%s.%s", catalogName, schemaName, volumeName) - _, err = w.Volumes.ReadByName(ctx, volumeName) - - // If the volume exists already, directly return the filer for the path to - // upload the artifacts to. - if err == nil { - uploadPath := path.Join(artifactPath, InternalDirName) - f, err := filer.NewFilesClient(w, uploadPath) - return f, uploadPath, diag.FromErr(err) - } - - baseErr := diag.Diagnostic{ - Severity: diag.Error, - Summary: fmt.Sprintf("unable to determine if volume at %s exists: %s", volumeFullName, err), - Locations: b.Config.GetLocations("workspace.artifact_path"), - Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, - } - - if errors.Is(err, apierr.ErrPermissionDenied) { - // If the API returned a 403, the user does not have permission to access the volume. - // Modify the error message to provide more context. - baseErr.Summary = fmt.Sprintf("cannot access volume %s: %s", volumeFullName, err) - } - if errors.Is(err, apierr.ErrNotFound) { - // Since the API returned a 404, the volume does not exist. - // Modify the error message to provide more context. - baseErr.Summary = fmt.Sprintf("volume %s does not exist", volumeFullName) - - // If the volume is defined in the bundle, provide a more helpful error diagnostic, - // with more details and location information. - path, locations, ok := FindVolumeInBundle(b.Config, catalogName, schemaName, volumeName) - if !ok { - return nil, "", diag.Diagnostics{baseErr} - } - baseErr.Detail = `You are using a volume in your artifact_path that is managed by -this bundle but which has not been deployed yet. Please first deploy -the volume using 'bundle deploy' and then switch over to using it in -the artifact_path.` - baseErr.Paths = append(baseErr.Paths, path) - baseErr.Locations = append(baseErr.Locations, locations...) - } - - return nil, "", diag.Diagnostics{baseErr} + uploadPath := path.Join(b.Config.Workspace.ArtifactPath, InternalDirName) + f, err := filer.NewFilesClient(w, uploadPath) + return f, uploadPath, diag.FromErr(err) } func FindVolumeInBundle(r config.Root, catalogName, schemaName, volumeName string) (dyn.Path, []dyn.Location, bool) { diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index a25e02f6c..560b07e39 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/validate" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/bundle/render" "github.com/databricks/cli/cmd/bundle/utils" @@ -71,6 +72,7 @@ func newDeployCommand() *cobra.Command { diags = diags.Extend( bundle.Apply(ctx, b, bundle.Seq( phases.Initialize(), + validate.FastValidate(), phases.Build(), phases.Deploy(outputHandler), )),