From 0c9c90208f00482d7bf725a0bb865e1f2a5f706f Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 18 Oct 2024 17:37:16 +0200 Subject: [PATCH 01/13] Added a warning when incorrect permissions used for `/Workspace/Shared` bundle root (#1821) ## Changes Added a warning when incorrect permissions used for `/Workspace/Shared` bundle root ## Tests Added unit test --------- Co-authored-by: Pieter Noordhuis --- bundle/permissions/validate.go | 56 +++++++++++++++++++ bundle/permissions/validate_test.go | 66 +++++++++++++++++++++++ bundle/permissions/workspace_root.go | 8 +-- bundle/permissions/workspace_root_test.go | 4 +- bundle/phases/initialize.go | 3 ++ 5 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 bundle/permissions/validate.go create mode 100644 bundle/permissions/validate_test.go diff --git a/bundle/permissions/validate.go b/bundle/permissions/validate.go new file mode 100644 index 000000000..acd2e6062 --- /dev/null +++ b/bundle/permissions/validate.go @@ -0,0 +1,56 @@ +package permissions + +import ( + "context" + "fmt" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +type validateSharedRootPermissions struct { +} + +func ValidateSharedRootPermissions() bundle.Mutator { + return &validateSharedRootPermissions{} +} + +func (*validateSharedRootPermissions) Name() string { + return "ValidateSharedRootPermissions" +} + +func (*validateSharedRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if isWorkspaceSharedRoot(b.Config.Workspace.RootPath) { + return isUsersGroupPermissionSet(b) + } + + return nil +} + +func isWorkspaceSharedRoot(path string) bool { + return strings.HasPrefix(path, "/Workspace/Shared/") +} + +// isUsersGroupPermissionSet checks that top-level permissions set for bundle contain group_name: users with CAN_MANAGE permission. +func isUsersGroupPermissionSet(b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + + allUsers := false + for _, p := range b.Config.Permissions { + if p.GroupName == "users" && p.Level == CAN_MANAGE { + allUsers = true + break + } + } + + if !allUsers { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: fmt.Sprintf("the bundle root path %s is writable by all workspace users", b.Config.Workspace.RootPath), + Detail: "The bundle is configured to use /Workspace/Shared, which will give read/write access to all users. If this is intentional, add CAN_MANAGE for 'group_name: users' permission to your bundle configuration. If the deployment should be restricted, move it to a restricted folder such as /Workspace/Users/.", + }) + } + + return diags +} diff --git a/bundle/permissions/validate_test.go b/bundle/permissions/validate_test.go new file mode 100644 index 000000000..ff132b4e1 --- /dev/null +++ b/bundle/permissions/validate_test.go @@ -0,0 +1,66 @@ +package permissions + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/require" +) + +func TestValidateSharedRootPermissionsForShared(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Shared/foo/bar", + }, + Permissions: []resources.Permission{ + {Level: CAN_MANAGE, GroupName: "users"}, + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job_1": {JobSettings: &jobs.JobSettings{Name: "job_1"}}, + "job_2": {JobSettings: &jobs.JobSettings{Name: "job_2"}}, + }, + }, + }, + } + + m := mocks.NewMockWorkspaceClient(t) + b.SetWorkpaceClient(m.WorkspaceClient) + + diags := bundle.Apply(context.Background(), b, bundle.Seq(ValidateSharedRootPermissions())) + require.Empty(t, diags) +} + +func TestValidateSharedRootPermissionsForSharedError(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Shared/foo/bar", + }, + Permissions: []resources.Permission{ + {Level: CAN_MANAGE, UserName: "foo@bar.com"}, + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job_1": {JobSettings: &jobs.JobSettings{Name: "job_1"}}, + "job_2": {JobSettings: &jobs.JobSettings{Name: "job_2"}}, + }, + }, + }, + } + + m := mocks.NewMockWorkspaceClient(t) + b.SetWorkpaceClient(m.WorkspaceClient) + + diags := bundle.Apply(context.Background(), b, bundle.Seq(ValidateSharedRootPermissions())) + require.Len(t, diags, 1) + require.Equal(t, "the bundle root path /Workspace/Shared/foo/bar is writable by all workspace users", diags[0].Summary) + require.Equal(t, diag.Warning, diags[0].Severity) +} diff --git a/bundle/permissions/workspace_root.go b/bundle/permissions/workspace_root.go index a59a039f6..e7867521e 100644 --- a/bundle/permissions/workspace_root.go +++ b/bundle/permissions/workspace_root.go @@ -16,6 +16,10 @@ func ApplyWorkspaceRootPermissions() bundle.Mutator { return &workspaceRootPermissions{} } +func (*workspaceRootPermissions) Name() string { + return "ApplyWorkspaceRootPermissions" +} + // Apply implements bundle.Mutator. func (*workspaceRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { err := giveAccessForWorkspaceRoot(ctx, b) @@ -26,10 +30,6 @@ func (*workspaceRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) di return nil } -func (*workspaceRootPermissions) Name() string { - return "ApplyWorkspaceRootPermissions" -} - func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle) error { permissions := make([]workspace.WorkspaceObjectAccessControlRequest, 0) diff --git a/bundle/permissions/workspace_root_test.go b/bundle/permissions/workspace_root_test.go index 5e23a1da8..6b37b2c41 100644 --- a/bundle/permissions/workspace_root_test.go +++ b/bundle/permissions/workspace_root_test.go @@ -69,6 +69,6 @@ func TestApplyWorkspaceRootPermissions(t *testing.T) { WorkspaceObjectType: "directories", }).Return(nil, nil) - diags := bundle.Apply(context.Background(), b, ApplyWorkspaceRootPermissions()) - require.NoError(t, diags.Error()) + diags := bundle.Apply(context.Background(), b, bundle.Seq(ValidateSharedRootPermissions(), ApplyWorkspaceRootPermissions())) + require.Empty(t, diags) } diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index da5b2eff6..5582016fd 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -76,8 +76,11 @@ func Initialize() bundle.Mutator { mutator.TranslatePaths(), trampoline.WrapperWarning(), + + permissions.ValidateSharedRootPermissions(), permissions.ApplyBundlePermissions(), permissions.FilterCurrentUser(), + metadata.AnnotateJobs(), metadata.AnnotatePipelines(), terraform.Initialize(), From 3b1fb6d2dfd10b3388f8fdc38c9ea611bb5f88ee Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 18 Oct 2024 17:38:10 +0200 Subject: [PATCH 02/13] Remove Terraform conversion function that's no longer used (#1840) ## Changes In #1218, the `BundleToTerraform` function was replaced by a version based on the dynamic configuration tree. Since then, it has only been used in tests to confirm that the output of the old function was equal to the output of the new function. We no longer need this and can exclusively rely on the version based on the dynamic configuration tree. ## Tests Tests pass. --- bundle/deploy/terraform/convert.go | 240 ------------------------ bundle/deploy/terraform/convert_test.go | 142 +++++++------- 2 files changed, 63 insertions(+), 319 deletions(-) diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index b8993c031..0ba8bb1f4 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -2,9 +2,7 @@ package terraform import ( "context" - "encoding/json" "fmt" - "sort" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" @@ -14,244 +12,6 @@ import ( tfjson "github.com/hashicorp/terraform-json" ) -func conv(from any, to any) { - buf, _ := json.Marshal(from) - json.Unmarshal(buf, &to) -} - -func convPermissions(acl []resources.Permission) *schema.ResourcePermissions { - if len(acl) == 0 { - return nil - } - - resource := schema.ResourcePermissions{} - for _, ac := range acl { - resource.AccessControl = append(resource.AccessControl, convPermission(ac)) - } - - return &resource -} - -func convPermission(ac resources.Permission) schema.ResourcePermissionsAccessControl { - dst := schema.ResourcePermissionsAccessControl{ - PermissionLevel: ac.Level, - } - if ac.UserName != "" { - dst.UserName = ac.UserName - } - if ac.GroupName != "" { - dst.GroupName = ac.GroupName - } - if ac.ServicePrincipalName != "" { - dst.ServicePrincipalName = ac.ServicePrincipalName - } - return dst -} - -func convGrants(acl []resources.Grant) *schema.ResourceGrants { - if len(acl) == 0 { - return nil - } - - resource := schema.ResourceGrants{} - for _, ac := range acl { - resource.Grant = append(resource.Grant, schema.ResourceGrantsGrant{ - Privileges: ac.Privileges, - Principal: ac.Principal, - }) - } - - return &resource -} - -// BundleToTerraform converts resources in a bundle configuration -// to the equivalent Terraform JSON representation. -// -// Note: This function is an older implementation of the conversion logic. It is -// no longer used in any code paths. It is kept around to be used in tests. -// New resources do not need to modify this function and can instead can define -// the conversion login in the tfdyn package. -func BundleToTerraform(config *config.Root) *schema.Root { - tfroot := schema.NewRoot() - tfroot.Provider = schema.NewProviders() - tfroot.Resource = schema.NewResources() - noResources := true - - for k, src := range config.Resources.Jobs { - noResources = false - var dst schema.ResourceJob - conv(src, &dst) - - if src.JobSettings != nil { - sort.Slice(src.JobSettings.Tasks, func(i, j int) bool { - return src.JobSettings.Tasks[i].TaskKey < src.JobSettings.Tasks[j].TaskKey - }) - - for _, v := range src.Tasks { - var t schema.ResourceJobTask - conv(v, &t) - - for _, v_ := range v.Libraries { - var l schema.ResourceJobTaskLibrary - conv(v_, &l) - t.Library = append(t.Library, l) - } - - // Convert for_each_task libraries - if v.ForEachTask != nil { - for _, v_ := range v.ForEachTask.Task.Libraries { - var l schema.ResourceJobTaskForEachTaskTaskLibrary - conv(v_, &l) - t.ForEachTask.Task.Library = append(t.ForEachTask.Task.Library, l) - } - - } - - dst.Task = append(dst.Task, t) - } - - for _, v := range src.JobClusters { - var t schema.ResourceJobJobCluster - conv(v, &t) - dst.JobCluster = append(dst.JobCluster, t) - } - - // Unblock downstream work. To be addressed more generally later. - if git := src.GitSource; git != nil { - dst.GitSource = &schema.ResourceJobGitSource{ - Url: git.GitUrl, - Branch: git.GitBranch, - Commit: git.GitCommit, - Provider: string(git.GitProvider), - Tag: git.GitTag, - } - } - - for _, v := range src.Parameters { - var t schema.ResourceJobParameter - conv(v, &t) - dst.Parameter = append(dst.Parameter, t) - } - } - - tfroot.Resource.Job[k] = &dst - - // Configure permissions for this resource. - if rp := convPermissions(src.Permissions); rp != nil { - rp.JobId = fmt.Sprintf("${databricks_job.%s.id}", k) - tfroot.Resource.Permissions["job_"+k] = rp - } - } - - for k, src := range config.Resources.Pipelines { - noResources = false - var dst schema.ResourcePipeline - conv(src, &dst) - - if src.PipelineSpec != nil { - for _, v := range src.Libraries { - var l schema.ResourcePipelineLibrary - conv(v, &l) - dst.Library = append(dst.Library, l) - } - - for _, v := range src.Clusters { - var l schema.ResourcePipelineCluster - conv(v, &l) - dst.Cluster = append(dst.Cluster, l) - } - - for _, v := range src.Notifications { - var l schema.ResourcePipelineNotification - conv(v, &l) - dst.Notification = append(dst.Notification, l) - } - } - - tfroot.Resource.Pipeline[k] = &dst - - // Configure permissions for this resource. - if rp := convPermissions(src.Permissions); rp != nil { - rp.PipelineId = fmt.Sprintf("${databricks_pipeline.%s.id}", k) - tfroot.Resource.Permissions["pipeline_"+k] = rp - } - } - - for k, src := range config.Resources.Models { - noResources = false - var dst schema.ResourceMlflowModel - conv(src, &dst) - tfroot.Resource.MlflowModel[k] = &dst - - // Configure permissions for this resource. - if rp := convPermissions(src.Permissions); rp != nil { - rp.RegisteredModelId = fmt.Sprintf("${databricks_mlflow_model.%s.registered_model_id}", k) - tfroot.Resource.Permissions["mlflow_model_"+k] = rp - } - } - - for k, src := range config.Resources.Experiments { - noResources = false - var dst schema.ResourceMlflowExperiment - conv(src, &dst) - tfroot.Resource.MlflowExperiment[k] = &dst - - // Configure permissions for this resource. - if rp := convPermissions(src.Permissions); rp != nil { - rp.ExperimentId = fmt.Sprintf("${databricks_mlflow_experiment.%s.id}", k) - tfroot.Resource.Permissions["mlflow_experiment_"+k] = rp - } - } - - for k, src := range config.Resources.ModelServingEndpoints { - noResources = false - var dst schema.ResourceModelServing - conv(src, &dst) - tfroot.Resource.ModelServing[k] = &dst - - // Configure permissions for this resource. - if rp := convPermissions(src.Permissions); rp != nil { - rp.ServingEndpointId = fmt.Sprintf("${databricks_model_serving.%s.serving_endpoint_id}", k) - tfroot.Resource.Permissions["model_serving_"+k] = rp - } - } - - for k, src := range config.Resources.RegisteredModels { - noResources = false - var dst schema.ResourceRegisteredModel - conv(src, &dst) - tfroot.Resource.RegisteredModel[k] = &dst - - // Configure permissions for this resource. - if rp := convGrants(src.Grants); rp != nil { - rp.Function = fmt.Sprintf("${databricks_registered_model.%s.id}", k) - tfroot.Resource.Grants["registered_model_"+k] = rp - } - } - - for k, src := range config.Resources.QualityMonitors { - noResources = false - var dst schema.ResourceQualityMonitor - conv(src, &dst) - tfroot.Resource.QualityMonitor[k] = &dst - } - - for k, src := range config.Resources.Clusters { - noResources = false - var dst schema.ResourceCluster - conv(src, &dst) - tfroot.Resource.Cluster[k] = &dst - } - - // We explicitly set "resource" to nil to omit it from a JSON encoding. - // This is required because the terraform CLI requires >= 1 resources defined - // if the "resource" property is used in a .tf.json file. - if noResources { - tfroot.Resource = nil - } - return tfroot -} - // BundleToTerraformWithDynValue converts resources in a bundle configuration // to the equivalent Terraform JSON representation. func BundleToTerraformWithDynValue(ctx context.Context, root dyn.Value) (*schema.Root, error) { diff --git a/bundle/deploy/terraform/convert_test.go b/bundle/deploy/terraform/convert_test.go index 4c6866d9d..575ff00bc 100644 --- a/bundle/deploy/terraform/convert_test.go +++ b/bundle/deploy/terraform/convert_test.go @@ -2,7 +2,6 @@ package terraform import ( "context" - "encoding/json" "reflect" "testing" @@ -21,6 +20,27 @@ import ( "github.com/stretchr/testify/require" ) +func produceTerraformConfiguration(t *testing.T, config config.Root) *schema.Root { + vin, err := convert.FromTyped(config, dyn.NilValue) + require.NoError(t, err) + out, err := BundleToTerraformWithDynValue(context.Background(), vin) + require.NoError(t, err) + return out +} + +func convertToResourceStruct[T any](t *testing.T, resource *T, data any) { + require.NotNil(t, resource) + require.NotNil(t, data) + + // Convert data to a dyn.Value. + vin, err := convert.FromTyped(data, dyn.NilValue) + require.NoError(t, err) + + // Convert the dyn.Value to a struct. + err = convert.ToTyped(resource, vin) + require.NoError(t, err) +} + func TestBundleToTerraformJob(t *testing.T) { var src = resources.Job{ JobSettings: &jobs.JobSettings{ @@ -58,8 +78,9 @@ func TestBundleToTerraformJob(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.Job["my_job"].(*schema.ResourceJob) + var resource schema.ResourceJob + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.Job["my_job"]) assert.Equal(t, "my job", resource.Name) assert.Len(t, resource.JobCluster, 1) @@ -68,8 +89,6 @@ func TestBundleToTerraformJob(t *testing.T) { assert.Equal(t, "param1", resource.Parameter[0].Name) assert.Equal(t, "param2", resource.Parameter[1].Name) assert.Nil(t, out.Data) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformJobPermissions(t *testing.T) { @@ -90,15 +109,14 @@ func TestBundleToTerraformJobPermissions(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.Permissions["job_my_job"].(*schema.ResourcePermissions) + var resource schema.ResourcePermissions + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.Permissions["job_my_job"]) assert.NotEmpty(t, resource.JobId) assert.Len(t, resource.AccessControl, 1) assert.Equal(t, "jane@doe.com", resource.AccessControl[0].UserName) assert.Equal(t, "CAN_VIEW", resource.AccessControl[0].PermissionLevel) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformJobTaskLibraries(t *testing.T) { @@ -128,15 +146,14 @@ func TestBundleToTerraformJobTaskLibraries(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.Job["my_job"].(*schema.ResourceJob) + var resource schema.ResourceJob + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.Job["my_job"]) assert.Equal(t, "my job", resource.Name) require.Len(t, resource.Task, 1) require.Len(t, resource.Task[0].Library, 1) assert.Equal(t, "mlflow", resource.Task[0].Library[0].Pypi.Package) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformForEachTaskLibraries(t *testing.T) { @@ -172,15 +189,14 @@ func TestBundleToTerraformForEachTaskLibraries(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.Job["my_job"].(*schema.ResourceJob) + var resource schema.ResourceJob + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.Job["my_job"]) assert.Equal(t, "my job", resource.Name) require.Len(t, resource.Task, 1) require.Len(t, resource.Task[0].ForEachTask.Task.Library, 1) assert.Equal(t, "mlflow", resource.Task[0].ForEachTask.Task.Library[0].Pypi.Package) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformPipeline(t *testing.T) { @@ -230,8 +246,9 @@ func TestBundleToTerraformPipeline(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.Pipeline["my_pipeline"].(*schema.ResourcePipeline) + var resource schema.ResourcePipeline + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.Pipeline["my_pipeline"]) assert.Equal(t, "my pipeline", resource.Name) assert.Len(t, resource.Library, 2) @@ -241,8 +258,6 @@ func TestBundleToTerraformPipeline(t *testing.T) { assert.Equal(t, resource.Notification[1].Alerts, []string{"on-update-failure", "on-flow-failure"}) assert.Equal(t, resource.Notification[1].EmailRecipients, []string{"jane@doe.com", "john@doe.com"}) assert.Nil(t, out.Data) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformPipelinePermissions(t *testing.T) { @@ -263,15 +278,14 @@ func TestBundleToTerraformPipelinePermissions(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.Permissions["pipeline_my_pipeline"].(*schema.ResourcePermissions) + var resource schema.ResourcePermissions + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.Permissions["pipeline_my_pipeline"]) assert.NotEmpty(t, resource.PipelineId) assert.Len(t, resource.AccessControl, 1) assert.Equal(t, "jane@doe.com", resource.AccessControl[0].UserName) assert.Equal(t, "CAN_VIEW", resource.AccessControl[0].PermissionLevel) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformModel(t *testing.T) { @@ -300,8 +314,9 @@ func TestBundleToTerraformModel(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.MlflowModel["my_model"].(*schema.ResourceMlflowModel) + var resource schema.ResourceMlflowModel + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.MlflowModel["my_model"]) assert.Equal(t, "name", resource.Name) assert.Equal(t, "description", resource.Description) @@ -311,8 +326,6 @@ func TestBundleToTerraformModel(t *testing.T) { assert.Equal(t, "k2", resource.Tags[1].Key) assert.Equal(t, "v2", resource.Tags[1].Value) assert.Nil(t, out.Data) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformModelPermissions(t *testing.T) { @@ -336,15 +349,14 @@ func TestBundleToTerraformModelPermissions(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.Permissions["mlflow_model_my_model"].(*schema.ResourcePermissions) + var resource schema.ResourcePermissions + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.Permissions["mlflow_model_my_model"]) assert.NotEmpty(t, resource.RegisteredModelId) assert.Len(t, resource.AccessControl, 1) assert.Equal(t, "jane@doe.com", resource.AccessControl[0].UserName) assert.Equal(t, "CAN_READ", resource.AccessControl[0].PermissionLevel) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformExperiment(t *testing.T) { @@ -362,13 +374,12 @@ func TestBundleToTerraformExperiment(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.MlflowExperiment["my_experiment"].(*schema.ResourceMlflowExperiment) + var resource schema.ResourceMlflowExperiment + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.MlflowExperiment["my_experiment"]) assert.Equal(t, "name", resource.Name) assert.Nil(t, out.Data) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformExperimentPermissions(t *testing.T) { @@ -392,15 +403,14 @@ func TestBundleToTerraformExperimentPermissions(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.Permissions["mlflow_experiment_my_experiment"].(*schema.ResourcePermissions) + var resource schema.ResourcePermissions + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.Permissions["mlflow_experiment_my_experiment"]) assert.NotEmpty(t, resource.ExperimentId) assert.Len(t, resource.AccessControl, 1) assert.Equal(t, "jane@doe.com", resource.AccessControl[0].UserName) assert.Equal(t, "CAN_READ", resource.AccessControl[0].PermissionLevel) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformModelServing(t *testing.T) { @@ -436,8 +446,9 @@ func TestBundleToTerraformModelServing(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.ModelServing["my_model_serving_endpoint"].(*schema.ResourceModelServing) + var resource schema.ResourceModelServing + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.ModelServing["my_model_serving_endpoint"]) assert.Equal(t, "name", resource.Name) assert.Equal(t, "model_name", resource.Config.ServedModels[0].ModelName) @@ -447,8 +458,6 @@ func TestBundleToTerraformModelServing(t *testing.T) { assert.Equal(t, "model_name-1", resource.Config.TrafficConfig.Routes[0].ServedModelName) assert.Equal(t, 100, resource.Config.TrafficConfig.Routes[0].TrafficPercentage) assert.Nil(t, out.Data) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformModelServingPermissions(t *testing.T) { @@ -490,15 +499,14 @@ func TestBundleToTerraformModelServingPermissions(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.Permissions["model_serving_my_model_serving_endpoint"].(*schema.ResourcePermissions) + var resource schema.ResourcePermissions + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.Permissions["model_serving_my_model_serving_endpoint"]) assert.NotEmpty(t, resource.ServingEndpointId) assert.Len(t, resource.AccessControl, 1) assert.Equal(t, "jane@doe.com", resource.AccessControl[0].UserName) assert.Equal(t, "CAN_VIEW", resource.AccessControl[0].PermissionLevel) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformRegisteredModel(t *testing.T) { @@ -519,16 +527,15 @@ func TestBundleToTerraformRegisteredModel(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.RegisteredModel["my_registered_model"].(*schema.ResourceRegisteredModel) + var resource schema.ResourceRegisteredModel + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.RegisteredModel["my_registered_model"]) assert.Equal(t, "name", resource.Name) assert.Equal(t, "catalog", resource.CatalogName) assert.Equal(t, "schema", resource.SchemaName) assert.Equal(t, "comment", resource.Comment) assert.Nil(t, out.Data) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformRegisteredModelGrants(t *testing.T) { @@ -554,15 +561,14 @@ func TestBundleToTerraformRegisteredModelGrants(t *testing.T) { }, } - out := BundleToTerraform(&config) - resource := out.Resource.Grants["registered_model_my_registered_model"].(*schema.ResourceGrants) + var resource schema.ResourceGrants + out := produceTerraformConfiguration(t, config) + convertToResourceStruct(t, &resource, out.Resource.Grants["registered_model_my_registered_model"]) assert.NotEmpty(t, resource.Function) assert.Len(t, resource.Grant, 1) assert.Equal(t, "jane@doe.com", resource.Grant[0].Principal) assert.Equal(t, "EXECUTE", resource.Grant[0].Privileges[0]) - - bundleToTerraformEquivalenceTest(t, &config) } func TestBundleToTerraformDeletedResources(t *testing.T) { @@ -1154,25 +1160,3 @@ func AssertFullResourceCoverage(t *testing.T, config *config.Root) { } } } - -func assertEqualTerraformRoot(t *testing.T, a, b *schema.Root) { - ba, err := json.Marshal(a) - require.NoError(t, err) - bb, err := json.Marshal(b) - require.NoError(t, err) - assert.JSONEq(t, string(ba), string(bb)) -} - -func bundleToTerraformEquivalenceTest(t *testing.T, config *config.Root) { - t.Run("dyn equivalence", func(t *testing.T) { - tf1 := BundleToTerraform(config) - - vin, err := convert.FromTyped(config, dyn.NilValue) - require.NoError(t, err) - tf2, err := BundleToTerraformWithDynValue(context.Background(), vin) - require.NoError(t, err) - - // Compare roots - assertEqualTerraformRoot(t, tf1, tf2) - }) -} From c9770503f8598ec9d6aeaafddca8c50003f96eb2 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 18 Oct 2024 17:42:05 +0200 Subject: [PATCH 03/13] Encode assumptions about the dashboards API in a test (#1839) ## Changes Dashboards can be imported either via its own CRUD API, or via the workspace import API. This test encodes the assumptions we can make about the API behavior. More specifically, the identity of the dashboard is retained on workspace import, as are the properties that aren't surfaced in the workspace import API. ## Tests The integration test passes. --- internal/acc/workspace.go | 30 +++++++ internal/dashboard_assumptions_test.go | 110 +++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 internal/dashboard_assumptions_test.go diff --git a/internal/acc/workspace.go b/internal/acc/workspace.go index 39374f229..69ab0e715 100644 --- a/internal/acc/workspace.go +++ b/internal/acc/workspace.go @@ -2,11 +2,14 @@ package acc import ( "context" + "fmt" "os" "testing" "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/stretchr/testify/require" ) @@ -94,3 +97,30 @@ func (t *WorkspaceT) RunPython(code string) (string, error) { require.True(t, ok, "unexpected type %T", results.Data) return output, nil } + +func (t *WorkspaceT) TemporaryWorkspaceDir(name ...string) string { + ctx := context.Background() + me, err := t.W.CurrentUser.Me(ctx) + require.NoError(t, err) + + basePath := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName(name...)) + + t.Logf("Creating %s", basePath) + err = t.W.Workspace.MkdirsByPath(ctx, basePath) + require.NoError(t, err) + + // Remove test directory on test completion. + t.Cleanup(func() { + t.Logf("Removing %s", basePath) + err := t.W.Workspace.Delete(ctx, workspace.Delete{ + Path: basePath, + Recursive: true, + }) + if err == nil || apierr.IsMissing(err) { + return + } + t.Logf("Unable to remove temporary workspace directory %s: %#v", basePath, err) + }) + + return basePath +} diff --git a/internal/dashboard_assumptions_test.go b/internal/dashboard_assumptions_test.go new file mode 100644 index 000000000..912e046b5 --- /dev/null +++ b/internal/dashboard_assumptions_test.go @@ -0,0 +1,110 @@ +package internal + +import ( + "encoding/base64" + "testing" + + "github.com/databricks/cli/internal/acc" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/dyn/merge" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/service/dashboards" + "github.com/databricks/databricks-sdk-go/service/workspace" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Verify that importing a dashboard through the Workspace API retains the identity of the underying resource, +// as well as properties exclusively accessible through the dashboards API. +func TestAccDashboardAssumptions_WorkspaceImport(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + t.Parallel() + + dashboardName := "New Dashboard" + dashboardPayload := []byte(`{"pages":[{"name":"2506f97a","displayName":"New Page"}]}`) + warehouseId := acc.GetEnvOrSkipTest(t, "TEST_DEFAULT_WAREHOUSE_ID") + + dir := wt.TemporaryWorkspaceDir("dashboard-assumptions-") + + dashboard, err := wt.W.Lakeview.Create(ctx, dashboards.CreateDashboardRequest{ + DisplayName: dashboardName, + ParentPath: dir, + SerializedDashboard: string(dashboardPayload), + WarehouseId: warehouseId, + }) + require.NoError(t, err) + t.Logf("Dashboard ID (per Lakeview API): %s", dashboard.DashboardId) + + // Overwrite the dashboard via the workspace API. + { + err := wt.W.Workspace.Import(ctx, workspace.Import{ + Format: workspace.ImportFormatAuto, + Path: dashboard.Path, + Content: base64.StdEncoding.EncodeToString(dashboardPayload), + Overwrite: true, + }) + require.NoError(t, err) + } + + // Cross-check consistency with the workspace object. + { + obj, err := wt.W.Workspace.GetStatusByPath(ctx, dashboard.Path) + require.NoError(t, err) + + // Confirm that the resource ID included in the response is equal to the dashboard ID. + require.Equal(t, dashboard.DashboardId, obj.ResourceId) + t.Logf("Dashboard ID (per workspace object status): %s", obj.ResourceId) + } + + // Try to overwrite the dashboard via the Lakeview API (and expect failure). + { + _, err := wt.W.Lakeview.Create(ctx, dashboards.CreateDashboardRequest{ + DisplayName: dashboardName, + ParentPath: dir, + SerializedDashboard: string(dashboardPayload), + }) + require.ErrorIs(t, err, apierr.ErrResourceAlreadyExists) + } + + // Retrieve the dashboard object and confirm that only select fields were updated by the import. + { + previousDashboard := dashboard + currentDashboard, err := wt.W.Lakeview.Get(ctx, dashboards.GetDashboardRequest{ + DashboardId: dashboard.DashboardId, + }) + require.NoError(t, err) + + // Convert the dashboard object to a [dyn.Value] to make comparison easier. + previous, err := convert.FromTyped(previousDashboard, dyn.NilValue) + require.NoError(t, err) + current, err := convert.FromTyped(currentDashboard, dyn.NilValue) + require.NoError(t, err) + + // Collect updated paths. + var updatedFieldPaths []string + _, err = merge.Override(previous, current, merge.OverrideVisitor{ + VisitDelete: func(basePath dyn.Path, left dyn.Value) error { + assert.Fail(t, "unexpected delete operation") + return nil + }, + VisitInsert: func(basePath dyn.Path, right dyn.Value) (dyn.Value, error) { + assert.Fail(t, "unexpected insert operation") + return right, nil + }, + VisitUpdate: func(basePath dyn.Path, left dyn.Value, right dyn.Value) (dyn.Value, error) { + updatedFieldPaths = append(updatedFieldPaths, basePath.String()) + return right, nil + }, + }) + require.NoError(t, err) + + // Confirm that only the expected fields have been updated. + assert.ElementsMatch(t, []string{ + "etag", + "update_time", + }, updatedFieldPaths) + } +} From eefda8c198c939098f90fa4abfa9c2f7f7b67590 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 18 Oct 2024 17:46:39 +0200 Subject: [PATCH 04/13] Fix path to repository-wide exclude file (#1837) ## Changes This file is at `info/exclude`, and not `info/excludes`. Also see https://git-scm.com/docs/gitignore. ## Tests Manually confirmed that these ignore patterns are now picked up. I created a repository with a pattern in this file and ran `sync` to confirm it ignores files matching the pattern. --- libs/git/repository.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/git/repository.go b/libs/git/repository.go index 6940ddac8..82ee7987b 100644 --- a/libs/git/repository.go +++ b/libs/git/repository.go @@ -252,8 +252,8 @@ func NewRepository(path vfs.Path) (*Repository, error) { newStringIgnoreRules([]string{ ".git", }), - // Load repository-wide excludes file. - repo.newIgnoreFile(".git/info/excludes"), + // Load repository-wide exclude file. + repo.newIgnoreFile(".git/info/exclude"), // Load root gitignore file. repo.newIgnoreFile(".gitignore"), } From ffdbec87cc5976587e73fd836a7fa8d1eaf6c6c3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 21 Oct 2024 13:45:39 +0200 Subject: [PATCH 05/13] Added support for pip options in environment dependencies (#1842) ## Changes Added support for specifying pip options such as `--extra-index-url` and etc. in environments dependencies ``` environments: - environment_key: Default spec: client: "1" dependencies: - --extra-index-url https://foo@bar.com/packages/smth somepackage - json==1.0.0 ``` ## Tests Added regression test --------- Co-authored-by: Pieter Noordhuis --- bundle/config/mutator/translate_paths_test.go | 6 ++++++ bundle/libraries/local_path.go | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index c03cee73e..9d655b27b 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -699,6 +699,9 @@ func TestTranslatePathJobEnvironments(t *testing.T) { "../dist/env2.whl", "simplejson", "/Workspace/Users/foo@bar.com/test.whl", + "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", + "foobar --extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple", + "https://foo@bar.com/packages/pypi/simple", }, }, }, @@ -719,6 +722,9 @@ func TestTranslatePathJobEnvironments(t *testing.T) { assert.Equal(t, strings.Join([]string{".", "dist", "env2.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) assert.Equal(t, "simplejson", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[2]) assert.Equal(t, "/Workspace/Users/foo@bar.com/test.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[3]) + assert.Equal(t, "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[4]) + assert.Equal(t, "foobar --extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[5]) + assert.Equal(t, "https://foo@bar.com/packages/pypi/simple", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[6]) } func TestTranslatePathWithComplexVariables(t *testing.T) { diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 6d60d56bc..53b714104 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -57,6 +57,12 @@ func IsLibraryLocal(dep string) bool { } } + // If the dependency starts with --, it's a pip flag option which is a valid + // entry for environment dependencies but not a local path + if containsPipFlag(dep) { + return false + } + // If the dependency is a requirements file, it's not a valid local path if strings.HasPrefix(dep, "-r") { return false @@ -70,6 +76,11 @@ func IsLibraryLocal(dep string) bool { return IsLocalPath(dep) } +func containsPipFlag(input string) bool { + re := regexp.MustCompile(`--[a-zA-Z0-9-]+`) + return re.MatchString(input) +} + // ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_). // \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. // ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?): Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). From ca45e53f42c5c4b26f2833554ab7118802c017cb Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 21 Oct 2024 19:56:17 +0200 Subject: [PATCH 06/13] Add script to make testing of code on branches easier (#1844) ## Changes Convenience script to exec into a shell where a CLI build for a specific branch is made available. ## Tests Manually from `/tmp` with `bash <([path]) demo-dashboards`. --- internal/bugbash/README.md | 13 ++++ internal/bugbash/exec.sh | 139 +++++++++++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 internal/bugbash/README.md create mode 100755 internal/bugbash/exec.sh diff --git a/internal/bugbash/README.md b/internal/bugbash/README.md new file mode 100644 index 000000000..941ab6227 --- /dev/null +++ b/internal/bugbash/README.md @@ -0,0 +1,13 @@ +# Bugbash + +The script in this directory can be used to conveniently exec into a shell +where a CLI build for a specific branch is made available. + +## Usage + +This script prompts if you do NOT have at least Bash 5 installed, +but works without command completion with earlier versions. + +```shell +bash <(curl -fsSL https://raw.githubusercontent.com/databricks/cli/main/internal/bugbash/exec.sh) my-branch +``` diff --git a/internal/bugbash/exec.sh b/internal/bugbash/exec.sh new file mode 100755 index 000000000..ac25b16ed --- /dev/null +++ b/internal/bugbash/exec.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Set the GitHub repository for the Databricks CLI. +export GH_REPO="databricks/cli" + +# Synthesize the directory name for the snapshot build. +function cli_snapshot_directory() { + dir="cli" + + # Append OS + case "$(uname -s)" in + Linux) + dir="${dir}_linux" + ;; + Darwin) + dir="${dir}_darwin" + ;; + *) + echo "Unknown operating system: $os" + ;; + esac + + # Append architecture + case "$(uname -m)" in + x86_64) + dir="${dir}_amd64_v1" + ;; + i386|i686) + dir="${dir}_386" + ;; + arm64|aarch64) + dir="${dir}_arm64" + ;; + armv7l|armv8l) + dir="${dir}_arm_6" + ;; + *) + echo "Unknown architecture: $arch" + ;; + esac + + echo $dir +} + +BRANCH=$1 +shift + +# Default to main branch if branch is not specified. +if [ -z "$BRANCH" ]; then + BRANCH=main +fi + +if [ -z "$BRANCH" ]; then + echo "Please specify which branch to bugbash..." + exit 1 +fi + +# Check if the "gh" command is available. +if ! command -v gh &> /dev/null; then + echo "The GitHub CLI (gh) is required to download the snapshot build." + echo "Install and configure it with:" + echo "" + echo " brew install gh" + echo " gh auth login" + echo "" + exit 1 +fi + +echo "Looking for a snapshot build of the Databricks CLI on branch $BRANCH..." + +# Find last successful build on $BRANCH. +last_successful_run_id=$( + gh run list -b "$BRANCH" -w release-snapshot --json 'databaseId,conclusion' | + jq 'limit(1; .[] | select(.conclusion == "success")) | .databaseId' +) +if [ -z "$last_successful_run_id" ]; then + echo "Unable to find last successful build of the release-snapshot workflow for branch $BRANCH." + exit 1 +fi + +# Determine artifact name with the right binaries for this runner. +case "$(uname -s)" in +Linux) + artifact="cli_linux_snapshot" + ;; +Darwin) + artifact="cli_darwin_snapshot" + ;; +esac + +# Create a temporary directory to download the artifact. +dir=$(mktemp -d) + +# Download the artifact. +echo "Downloading the snapshot build..." +gh run download "$last_successful_run_id" -n "$artifact" -D "$dir/.bin" +dir="$dir/.bin/$(cli_snapshot_directory)" +if [ ! -d "$dir" ]; then + echo "Directory does not exist: $dir" + exit 1 +fi + +# Make CLI available on $PATH. +chmod +x "$dir/databricks" +export PATH="$dir:$PATH" + +# Set the prompt to indicate the bugbash environment and exec. +export PS1="(bugbash $BRANCH) \[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ " + +# Display completion instructions. +echo "" +echo "==================================================================" + +if [[ ${BASH_VERSINFO[0]} -lt 5 ]]; then + echo -en "\033[31m" + echo "You have Bash version < 5 installed... completion won't work." + echo -en "\033[0m" + echo "" + echo "Install it with:" + echo "" + echo " brew install bash bash-completion" + echo "" + echo "==================================================================" +fi + +echo "" +echo "To load completions in your current shell session:" +echo "" +echo " source /opt/homebrew/etc/profile.d/bash_completion.sh" +echo " source <(databricks completion bash)" +echo "" +echo "==================================================================" +echo "" + +# Exec into a new shell. +# Note: don't use zsh because on macOS it _always_ overwrites PS1. +exec /usr/bin/env bash From 571076d5e1a637e4a6d4f2873665f1e4a0cb4c7f Mon Sep 17 00:00:00 2001 From: Stephen Macke Date: Mon, 21 Oct 2024 11:27:07 -0700 Subject: [PATCH 07/13] Support Git worktrees for `sync` (#1831) ## Changes This change allows the `sync` command to work from [git worktrees](https://git-scm.com/docs/git-worktree). ## Tests * Added unit tests for traversal of worktree related files. * Manually confirmed that synchronization of files from a main checkout, as well as a worktree, observed the same ignore rules (both locally defined as well as from `$GIT_DIR/info/exclude`). --------- Co-authored-by: Pieter Noordhuis --- libs/git/repository.go | 64 ++++++++++++-------- libs/git/view.go | 2 +- libs/git/worktree.go | 123 ++++++++++++++++++++++++++++++++++++++ libs/git/worktree_test.go | 108 +++++++++++++++++++++++++++++++++ 4 files changed, 271 insertions(+), 26 deletions(-) create mode 100644 libs/git/worktree.go create mode 100644 libs/git/worktree_test.go diff --git a/libs/git/repository.go b/libs/git/repository.go index 82ee7987b..0bbd57865 100644 --- a/libs/git/repository.go +++ b/libs/git/repository.go @@ -23,8 +23,21 @@ type Repository struct { // directory where we process .gitignore files. real bool - // root is the absolute path to the repository root. - root vfs.Path + // rootDir is the path to the root of the repository checkout. + // This can be either the main repository checkout or a worktree checkout. + // For more information about worktrees, see: https://git-scm.com/docs/git-worktree#_description. + rootDir vfs.Path + + // gitDir is the equivalent of $GIT_DIR and points to the + // `.git` directory of a repository or a worktree directory. + // See https://git-scm.com/docs/git-worktree#_details for more information. + gitDir vfs.Path + + // gitCommonDir is the equivalent of $GIT_COMMON_DIR and points to the + // `.git` directory of the main working tree (common between worktrees). + // This is equivalent to [gitDir] if this is the main working tree. + // See https://git-scm.com/docs/git-worktree#_details for more information. + gitCommonDir vfs.Path // ignore contains a list of ignore patterns indexed by the // path prefix relative to the repository root. @@ -44,12 +57,11 @@ type Repository struct { // Root returns the absolute path to the repository root. func (r *Repository) Root() string { - return r.root.Native() + return r.rootDir.Native() } func (r *Repository) CurrentBranch() (string, error) { - // load .git/HEAD - ref, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, "HEAD")) + ref, err := LoadReferenceFile(r.gitDir, "HEAD") if err != nil { return "", err } @@ -65,8 +77,7 @@ func (r *Repository) CurrentBranch() (string, error) { } func (r *Repository) LatestCommit() (string, error) { - // load .git/HEAD - ref, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, "HEAD")) + ref, err := LoadReferenceFile(r.gitDir, "HEAD") if err != nil { return "", err } @@ -80,12 +91,12 @@ func (r *Repository) LatestCommit() (string, error) { return ref.Content, nil } - // read reference from .git/HEAD + // Read reference from $GIT_DIR/HEAD branchHeadPath, err := ref.ResolvePath() if err != nil { return "", err } - branchHeadRef, err := LoadReferenceFile(r.root, path.Join(GitDirectoryName, branchHeadPath)) + branchHeadRef, err := LoadReferenceFile(r.gitCommonDir, branchHeadPath) if err != nil { return "", err } @@ -125,7 +136,7 @@ func (r *Repository) loadConfig() error { if err != nil { return fmt.Errorf("unable to load user specific gitconfig: %w", err) } - err = config.loadFile(r.root, ".git/config") + err = config.loadFile(r.gitCommonDir, "config") if err != nil { return fmt.Errorf("unable to load repository specific gitconfig: %w", err) } @@ -133,12 +144,6 @@ func (r *Repository) loadConfig() error { return nil } -// newIgnoreFile constructs a new [ignoreRules] implementation backed by -// a file using the specified path relative to the repository root. -func (r *Repository) newIgnoreFile(relativeIgnoreFilePath string) ignoreRules { - return newIgnoreFile(r.root, relativeIgnoreFilePath) -} - // getIgnoreRules returns a slice of [ignoreRules] that apply // for the specified prefix. The prefix must be cleaned by the caller. // It lazily initializes an entry for the specified prefix if it @@ -149,7 +154,7 @@ func (r *Repository) getIgnoreRules(prefix string) []ignoreRules { return fs } - r.ignore[prefix] = append(r.ignore[prefix], r.newIgnoreFile(path.Join(prefix, gitIgnoreFileName))) + r.ignore[prefix] = append(r.ignore[prefix], newIgnoreFile(r.rootDir, path.Join(prefix, gitIgnoreFileName))) return r.ignore[prefix] } @@ -205,21 +210,30 @@ func (r *Repository) Ignore(relPath string) (bool, error) { func NewRepository(path vfs.Path) (*Repository, error) { real := true - rootPath, err := vfs.FindLeafInTree(path, GitDirectoryName) + rootDir, err := vfs.FindLeafInTree(path, GitDirectoryName) if err != nil { if !errors.Is(err, fs.ErrNotExist) { return nil, err } // Cannot find `.git` directory. - // Treat the specified path as a potential repository root. + // Treat the specified path as a potential repository root checkout. real = false - rootPath = path + rootDir = path + } + + // Derive $GIT_DIR and $GIT_COMMON_DIR paths if this is a real repository. + // If it isn't a real repository, they'll point to the (non-existent) `.git` directory. + gitDir, gitCommonDir, err := resolveGitDirs(rootDir) + if err != nil { + return nil, err } repo := &Repository{ - real: real, - root: rootPath, - ignore: make(map[string][]ignoreRules), + real: real, + rootDir: rootDir, + gitDir: gitDir, + gitCommonDir: gitCommonDir, + ignore: make(map[string][]ignoreRules), } err = repo.loadConfig() @@ -253,9 +267,9 @@ func NewRepository(path vfs.Path) (*Repository, error) { ".git", }), // Load repository-wide exclude file. - repo.newIgnoreFile(".git/info/exclude"), + newIgnoreFile(repo.gitCommonDir, "info/exclude"), // Load root gitignore file. - repo.newIgnoreFile(".gitignore"), + newIgnoreFile(repo.rootDir, ".gitignore"), } return repo, nil diff --git a/libs/git/view.go b/libs/git/view.go index 90eed0bb8..2d2e39a60 100644 --- a/libs/git/view.go +++ b/libs/git/view.go @@ -80,7 +80,7 @@ func NewView(root vfs.Path) (*View, error) { // Target path must be relative to the repository root path. target := root.Native() - prefix := repo.root.Native() + prefix := repo.rootDir.Native() if !strings.HasPrefix(target, prefix) { return nil, fmt.Errorf("path %q is not within repository root %q", root.Native(), prefix) } diff --git a/libs/git/worktree.go b/libs/git/worktree.go new file mode 100644 index 000000000..964c1c95b --- /dev/null +++ b/libs/git/worktree.go @@ -0,0 +1,123 @@ +package git + +import ( + "bufio" + "errors" + "fmt" + "io/fs" + "path/filepath" + "strings" + + "github.com/databricks/cli/libs/vfs" +) + +func readLines(root vfs.Path, name string) ([]string, error) { + file, err := root.Open(name) + if err != nil { + return nil, err + } + + defer file.Close() + + var lines []string + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + + return lines, scanner.Err() +} + +// readGitDir reads the value of the `.git` file in a worktree. +func readGitDir(root vfs.Path) (string, error) { + lines, err := readLines(root, GitDirectoryName) + if err != nil { + return "", err + } + + var gitDir string + for _, line := range lines { + parts := strings.SplitN(line, ": ", 2) + if len(parts) != 2 { + continue + } + + if parts[0] == "gitdir" { + gitDir = strings.TrimSpace(parts[1]) + } + } + + if gitDir == "" { + return "", fmt.Errorf(`expected %q to contain a line with "gitdir: [...]"`, filepath.Join(root.Native(), GitDirectoryName)) + } + + return gitDir, nil +} + +// readGitCommonDir reads the value of the `commondir` file in the `.git` directory of a worktree. +// This file typically contains "../.." to point to $GIT_COMMON_DIR. +func readGitCommonDir(gitDir vfs.Path) (string, error) { + lines, err := readLines(gitDir, "commondir") + if err != nil { + return "", err + } + + if len(lines) == 0 { + return "", errors.New("file is empty") + } + + return strings.TrimSpace(lines[0]), nil +} + +// resolveGitDirs resolves the paths for $GIT_DIR and $GIT_COMMON_DIR. +// The path argument is the root of the checkout where (supposedly) a `.git` file or directory exists. +func resolveGitDirs(root vfs.Path) (vfs.Path, vfs.Path, error) { + fileInfo, err := root.Stat(GitDirectoryName) + if err != nil { + // If the `.git` file or directory does not exist, then this is not a git repository. + // Return paths that we know don't exist, so we do not need to perform nil checks in the caller. + if errors.Is(err, fs.ErrNotExist) { + gitDir := vfs.MustNew(filepath.Join(root.Native(), GitDirectoryName)) + return gitDir, gitDir, nil + } + return nil, nil, err + } + + // If the path is a directory, then it is the main working tree. + // Both $GIT_DIR and $GIT_COMMON_DIR point to the same directory. + if fileInfo.IsDir() { + gitDir := vfs.MustNew(filepath.Join(root.Native(), GitDirectoryName)) + return gitDir, gitDir, nil + } + + // If the path is not a directory, then it is a worktree. + // Read value for $GIT_DIR. + gitDirValue, err := readGitDir(root) + if err != nil { + return nil, nil, err + } + + // Resolve $GIT_DIR. + var gitDir vfs.Path + if filepath.IsAbs(gitDirValue) { + gitDir = vfs.MustNew(gitDirValue) + } else { + gitDir = vfs.MustNew(filepath.Join(root.Native(), gitDirValue)) + } + + // Read value for $GIT_COMMON_DIR. + gitCommonDirValue, err := readGitCommonDir(gitDir) + if err != nil { + return nil, nil, fmt.Errorf(`expected "commondir" file in worktree git folder at %q: %w`, gitDir.Native(), err) + } + + // Resolve $GIT_COMMON_DIR. + var gitCommonDir vfs.Path + if filepath.IsAbs(gitCommonDirValue) { + gitCommonDir = vfs.MustNew(gitCommonDirValue) + } else { + gitCommonDir = vfs.MustNew(filepath.Join(gitDir.Native(), gitCommonDirValue)) + } + + return gitDir, gitCommonDir, nil +} diff --git a/libs/git/worktree_test.go b/libs/git/worktree_test.go new file mode 100644 index 000000000..3d620c483 --- /dev/null +++ b/libs/git/worktree_test.go @@ -0,0 +1,108 @@ +package git + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func setupWorktree(t *testing.T) string { + var err error + + tmpDir := t.TempDir() + + // Checkout path + err = os.MkdirAll(filepath.Join(tmpDir, "my_worktree"), os.ModePerm) + require.NoError(t, err) + + // Main $GIT_COMMON_DIR + err = os.MkdirAll(filepath.Join(tmpDir, ".git"), os.ModePerm) + require.NoError(t, err) + + // Worktree $GIT_DIR + err = os.MkdirAll(filepath.Join(tmpDir, ".git/worktrees/my_worktree"), os.ModePerm) + require.NoError(t, err) + + return tmpDir +} + +func writeGitDir(t *testing.T, dir, content string) { + err := os.WriteFile(filepath.Join(dir, "my_worktree/.git"), []byte(content), os.ModePerm) + require.NoError(t, err) +} + +func writeGitCommonDir(t *testing.T, dir, content string) { + err := os.WriteFile(filepath.Join(dir, ".git/worktrees/my_worktree/commondir"), []byte(content), os.ModePerm) + require.NoError(t, err) +} + +func verifyCorrectDirs(t *testing.T, dir string) { + gitDir, gitCommonDir, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + require.NoError(t, err) + assert.Equal(t, filepath.Join(dir, ".git/worktrees/my_worktree"), gitDir.Native()) + assert.Equal(t, filepath.Join(dir, ".git"), gitCommonDir.Native()) +} + +func TestWorktreeResolveGitDir(t *testing.T) { + dir := setupWorktree(t) + writeGitCommonDir(t, dir, "../..") + + t.Run("relative", func(t *testing.T) { + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", "../.git/worktrees/my_worktree")) + verifyCorrectDirs(t, dir) + }) + + t.Run("absolute", func(t *testing.T) { + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", filepath.Join(dir, ".git/worktrees/my_worktree"))) + verifyCorrectDirs(t, dir) + }) + + t.Run("additional spaces", func(t *testing.T) { + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s \n\n\n", "../.git/worktrees/my_worktree")) + verifyCorrectDirs(t, dir) + }) + + t.Run("empty", func(t *testing.T) { + writeGitDir(t, dir, "") + + _, _, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + assert.ErrorContains(t, err, ` to contain a line with "gitdir: [...]"`) + }) +} + +func TestWorktreeResolveCommonDir(t *testing.T) { + dir := setupWorktree(t) + writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", "../.git/worktrees/my_worktree")) + + t.Run("relative", func(t *testing.T) { + writeGitCommonDir(t, dir, "../..") + verifyCorrectDirs(t, dir) + }) + + t.Run("absolute", func(t *testing.T) { + writeGitCommonDir(t, dir, filepath.Join(dir, ".git")) + verifyCorrectDirs(t, dir) + }) + + t.Run("additional spaces", func(t *testing.T) { + writeGitCommonDir(t, dir, " ../.. \n\n\n") + verifyCorrectDirs(t, dir) + }) + + t.Run("empty", func(t *testing.T) { + writeGitCommonDir(t, dir, "") + + _, _, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + assert.ErrorContains(t, err, `expected "commondir" file in worktree git folder at `) + }) + + t.Run("missing", func(t *testing.T) { + _, _, err := resolveGitDirs(vfs.MustNew(filepath.Join(dir, "my_worktree"))) + assert.ErrorContains(t, err, `expected "commondir" file in worktree git folder at `) + }) +} From f8bb3a8d729b4cbe6313c1e313379dbe70f6b680 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 11:37:01 +0200 Subject: [PATCH 08/13] Bump github.com/databricks/databricks-sdk-go from 0.48.0 to 0.49.0 (#1843) Bumps [github.com/databricks/databricks-sdk-go](https://github.com/databricks/databricks-sdk-go) from 0.48.0 to 0.49.0.
Release notes

Sourced from github.com/databricks/databricks-sdk-go's releases.

v0.49.0

API Changes:

OpenAPI SHA: cf9c61453990df0f9453670f2fe68e1b128647a2, Date: 2024-10-14

Changelog

Sourced from github.com/databricks/databricks-sdk-go's changelog.

[Release] Release v0.49.0

API Changes:

OpenAPI SHA: cf9c61453990df0f9453670f2fe68e1b128647a2, Date: 2024-10-14

Commits

Most Recent Ignore Conditions Applied to This Pull Request | Dependency Name | Ignore Conditions | | --- | --- | | github.com/databricks/databricks-sdk-go | [>= 0.28.a, < 0.29] |
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/databricks/databricks-sdk-go&package-manager=go_modules&previous-version=0.48.0&new-version=0.49.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
--------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Andrew Nester --- .codegen/_openapi_sha | 2 +- .gitattributes | 1 + bundle/schema/jsonschema.json | 48 +++- cmd/workspace/apps/apps.go | 3 - .../disable-legacy-dbfs.go | 220 ++++++++++++++++++ cmd/workspace/jobs/jobs.go | 1 + cmd/workspace/settings/settings.go | 2 + go.mod | 2 +- go.sum | 4 +- 9 files changed, 275 insertions(+), 8 deletions(-) create mode 100755 cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index 303c78553..2d9cb6d86 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -0c86ea6dbd9a730c24ff0d4e509603e476955ac5 \ No newline at end of file +cf9c61453990df0f9453670f2fe68e1b128647a2 \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index 2470eb33d..ae10198bb 100755 --- a/.gitattributes +++ b/.gitattributes @@ -54,6 +54,7 @@ cmd/workspace/dashboards/dashboards.go linguist-generated=true cmd/workspace/data-sources/data-sources.go linguist-generated=true cmd/workspace/default-namespace/default-namespace.go linguist-generated=true cmd/workspace/disable-legacy-access/disable-legacy-access.go linguist-generated=true +cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go linguist-generated=true cmd/workspace/enhanced-security-monitoring/enhanced-security-monitoring.go linguist-generated=true cmd/workspace/experiments/experiments.go linguist-generated=true cmd/workspace/external-locations/external-locations.go linguist-generated=true diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 06b9cc15a..178656fe0 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -209,6 +209,10 @@ { "type": "object", "properties": { + "budget_policy_id": { + "description": "The id of the user specified budget policy to use for this job.\nIf not specified, a default budget policy may be applied when creating or modifying the job.\nSee `effective_budget_policy_id` for the budget policy used by this workload.", + "$ref": "#/$defs/string" + }, "continuous": { "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Continuous" @@ -3901,6 +3905,10 @@ { "type": "object", "properties": { + "report": { + "description": "Select tables from a specific source report.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.ReportSpec" + }, "schema": { "description": "Select tables from a specific source schema.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.SchemaSpec" @@ -4233,6 +4241,40 @@ } ] }, + "pipelines.ReportSpec": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination_catalog": { + "description": "Required. Destination catalog to store table.", + "$ref": "#/$defs/string" + }, + "destination_schema": { + "description": "Required. Destination schema to store table.", + "$ref": "#/$defs/string" + }, + "destination_table": { + "description": "Required. Destination table name. The pipeline fails if a table with that name already exists.", + "$ref": "#/$defs/string" + }, + "source_url": { + "description": "Required. Report URL in the source system.", + "$ref": "#/$defs/string" + }, + "table_configuration": { + "description": "Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "pipelines.SchemaSpec": { "anyOf": [ { @@ -4281,7 +4323,7 @@ "$ref": "#/$defs/string" }, "destination_table": { - "description": "Optional. Destination table name. The pipeline fails If a table with that name already exists. If not set, the source table name is used.", + "description": "Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used.", "$ref": "#/$defs/string" }, "source_catalog": { @@ -4329,6 +4371,10 @@ "SCD_TYPE_1", "SCD_TYPE_2" ] + }, + "sequence_by": { + "description": "The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order.", + "$ref": "#/$defs/slice/string" } }, "additionalProperties": false diff --git a/cmd/workspace/apps/apps.go b/cmd/workspace/apps/apps.go index 4cee2f82a..9331ddc2e 100755 --- a/cmd/workspace/apps/apps.go +++ b/cmd/workspace/apps/apps.go @@ -28,9 +28,6 @@ func New() *cobra.Command { Annotations: map[string]string{ "package": "apps", }, - - // This service is being previewed; hide from help output. - Hidden: true, } // Add methods diff --git a/cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go b/cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go new file mode 100755 index 000000000..d09755370 --- /dev/null +++ b/cmd/workspace/disable-legacy-dbfs/disable-legacy-dbfs.go @@ -0,0 +1,220 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package disable_legacy_dbfs + +import ( + "fmt" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + "github.com/databricks/databricks-sdk-go/service/settings" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "disable-legacy-dbfs", + Short: `When this setting is on, access to DBFS root and DBFS mounts is disallowed (as well as creation of new mounts).`, + Long: `When this setting is on, access to DBFS root and DBFS mounts is disallowed (as + well as creation of new mounts). When the setting is off, all DBFS + functionality is enabled`, + + // This service is being previewed; hide from help output. + Hidden: true, + } + + // Add methods + cmd.AddCommand(newDelete()) + cmd.AddCommand(newGet()) + cmd.AddCommand(newUpdate()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start delete command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var deleteOverrides []func( + *cobra.Command, + *settings.DeleteDisableLegacyDbfsRequest, +) + +func newDelete() *cobra.Command { + cmd := &cobra.Command{} + + var deleteReq settings.DeleteDisableLegacyDbfsRequest + + // TODO: short flags + + cmd.Flags().StringVar(&deleteReq.Etag, "etag", deleteReq.Etag, `etag used for versioning.`) + + cmd.Use = "delete" + cmd.Short = `Delete the disable legacy DBFS setting.` + cmd.Long = `Delete the disable legacy DBFS setting. + + Deletes the disable legacy DBFS setting for a workspace, reverting back to the + default.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(0) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + response, err := w.Settings.DisableLegacyDbfs().Delete(ctx, deleteReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range deleteOverrides { + fn(cmd, &deleteReq) + } + + return cmd +} + +// start get command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var getOverrides []func( + *cobra.Command, + *settings.GetDisableLegacyDbfsRequest, +) + +func newGet() *cobra.Command { + cmd := &cobra.Command{} + + var getReq settings.GetDisableLegacyDbfsRequest + + // TODO: short flags + + cmd.Flags().StringVar(&getReq.Etag, "etag", getReq.Etag, `etag used for versioning.`) + + cmd.Use = "get" + cmd.Short = `Get the disable legacy DBFS setting.` + cmd.Long = `Get the disable legacy DBFS setting. + + Gets the disable legacy DBFS setting.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(0) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + response, err := w.Settings.DisableLegacyDbfs().Get(ctx, getReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range getOverrides { + fn(cmd, &getReq) + } + + return cmd +} + +// start update command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var updateOverrides []func( + *cobra.Command, + *settings.UpdateDisableLegacyDbfsRequest, +) + +func newUpdate() *cobra.Command { + cmd := &cobra.Command{} + + var updateReq settings.UpdateDisableLegacyDbfsRequest + var updateJson flags.JsonFlag + + // TODO: short flags + cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + + cmd.Use = "update" + cmd.Short = `Update the disable legacy DBFS setting.` + cmd.Long = `Update the disable legacy DBFS setting. + + Updates the disable legacy DBFS setting for the workspace.` + + cmd.Annotations = make(map[string]string) + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + if cmd.Flags().Changed("json") { + diags := updateJson.Unmarshal(&updateReq) + if diags.HasError() { + return diags.Error() + } + if len(diags) > 0 { + err := cmdio.RenderDiagnosticsToErrorOut(ctx, diags) + if err != nil { + return err + } + } + } else { + return fmt.Errorf("please provide command input in JSON format by specifying the --json flag") + } + + response, err := w.Settings.DisableLegacyDbfs().Update(ctx, updateReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range updateOverrides { + fn(cmd, &updateReq) + } + + return cmd +} + +// end service DisableLegacyDbfs diff --git a/cmd/workspace/jobs/jobs.go b/cmd/workspace/jobs/jobs.go index d4ceb0c28..9e8db43d0 100755 --- a/cmd/workspace/jobs/jobs.go +++ b/cmd/workspace/jobs/jobs.go @@ -1557,6 +1557,7 @@ func newSubmit() *cobra.Command { cmd.Flags().Var(&submitJson, "json", `either inline JSON string or @path/to/file.json with request body`) // TODO: array: access_control_list + cmd.Flags().StringVar(&submitReq.BudgetPolicyId, "budget-policy-id", submitReq.BudgetPolicyId, `The user specified id of the budget policy to use for this one-time run.`) // TODO: complex arg: email_notifications // TODO: array: environments // TODO: complex arg: git_source diff --git a/cmd/workspace/settings/settings.go b/cmd/workspace/settings/settings.go index aaeecf41b..31e6ceee4 100755 --- a/cmd/workspace/settings/settings.go +++ b/cmd/workspace/settings/settings.go @@ -9,6 +9,7 @@ import ( compliance_security_profile "github.com/databricks/cli/cmd/workspace/compliance-security-profile" default_namespace "github.com/databricks/cli/cmd/workspace/default-namespace" disable_legacy_access "github.com/databricks/cli/cmd/workspace/disable-legacy-access" + disable_legacy_dbfs "github.com/databricks/cli/cmd/workspace/disable-legacy-dbfs" enhanced_security_monitoring "github.com/databricks/cli/cmd/workspace/enhanced-security-monitoring" restrict_workspace_admins "github.com/databricks/cli/cmd/workspace/restrict-workspace-admins" ) @@ -33,6 +34,7 @@ func New() *cobra.Command { cmd.AddCommand(compliance_security_profile.New()) cmd.AddCommand(default_namespace.New()) cmd.AddCommand(disable_legacy_access.New()) + cmd.AddCommand(disable_legacy_dbfs.New()) cmd.AddCommand(enhanced_security_monitoring.New()) cmd.AddCommand(restrict_workspace_admins.New()) diff --git a/go.mod b/go.mod index 697205f33..9059b9637 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ toolchain go1.22.7 require ( github.com/Masterminds/semver/v3 v3.3.0 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 - github.com/databricks/databricks-sdk-go v0.48.0 // Apache 2.0 + github.com/databricks/databricks-sdk-go v0.49.0 // Apache 2.0 github.com/fatih/color v1.17.0 // MIT github.com/ghodss/yaml v1.0.0 // MIT + NOTICE github.com/google/uuid v1.6.0 // BSD-3-Clause diff --git a/go.sum b/go.sum index 03698b20a..f365fcbf6 100644 --- a/go.sum +++ b/go.sum @@ -32,8 +32,8 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg= github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/databricks/databricks-sdk-go v0.48.0 h1:46KtsnRo+FGhC3izUXbpL0PXBNomvsdignYDhJZlm9s= -github.com/databricks/databricks-sdk-go v0.48.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= +github.com/databricks/databricks-sdk-go v0.49.0 h1:VBTeZZMLIuBSM4kxOCfUcW9z4FUQZY2QeNRD5qm9FUQ= +github.com/databricks/databricks-sdk-go v0.49.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= From 68d69d6e0bb420cdfbdceceb686717912187980e Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 22 Oct 2024 12:43:43 +0200 Subject: [PATCH 09/13] Upgrade TF provider to 1.54.0 (#1852) ## Changes Upgrade TF provider to 1.54.0 --- bundle/internal/tf/codegen/schema/version.go | 2 +- .../data_source_notification_destinations.go | 15 +++++++++ .../tf/schema/data_source_registered_model.go | 32 +++++++++++++++++++ bundle/internal/tf/schema/data_sources.go | 4 +++ bundle/internal/tf/schema/resource_job.go | 1 + .../tf/schema/resource_online_table.go | 11 ++++--- .../internal/tf/schema/resource_pipeline.go | 19 +++++++++++ bundle/internal/tf/schema/root.go | 2 +- 8 files changed, 79 insertions(+), 7 deletions(-) create mode 100644 bundle/internal/tf/schema/data_source_notification_destinations.go create mode 100644 bundle/internal/tf/schema/data_source_registered_model.go diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index 49e48a6e3..0c4244089 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,3 @@ package schema -const ProviderVersion = "1.53.0" +const ProviderVersion = "1.54.0" diff --git a/bundle/internal/tf/schema/data_source_notification_destinations.go b/bundle/internal/tf/schema/data_source_notification_destinations.go new file mode 100644 index 000000000..c95ad6db9 --- /dev/null +++ b/bundle/internal/tf/schema/data_source_notification_destinations.go @@ -0,0 +1,15 @@ +// Generated from Databricks Terraform provider schema. DO NOT EDIT. + +package schema + +type DataSourceNotificationDestinationsNotificationDestinations struct { + DestinationType string `json:"destination_type,omitempty"` + DisplayName string `json:"display_name,omitempty"` + Id string `json:"id,omitempty"` +} + +type DataSourceNotificationDestinations struct { + DisplayNameContains string `json:"display_name_contains,omitempty"` + Type string `json:"type,omitempty"` + NotificationDestinations []DataSourceNotificationDestinationsNotificationDestinations `json:"notification_destinations,omitempty"` +} diff --git a/bundle/internal/tf/schema/data_source_registered_model.go b/bundle/internal/tf/schema/data_source_registered_model.go new file mode 100644 index 000000000..e19e0849a --- /dev/null +++ b/bundle/internal/tf/schema/data_source_registered_model.go @@ -0,0 +1,32 @@ +// Generated from Databricks Terraform provider schema. DO NOT EDIT. + +package schema + +type DataSourceRegisteredModelModelInfoAliases struct { + AliasName string `json:"alias_name,omitempty"` + VersionNum int `json:"version_num,omitempty"` +} + +type DataSourceRegisteredModelModelInfo struct { + BrowseOnly bool `json:"browse_only,omitempty"` + CatalogName string `json:"catalog_name,omitempty"` + Comment string `json:"comment,omitempty"` + CreatedAt int `json:"created_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + FullName string `json:"full_name,omitempty"` + MetastoreId string `json:"metastore_id,omitempty"` + Name string `json:"name,omitempty"` + Owner string `json:"owner,omitempty"` + SchemaName string `json:"schema_name,omitempty"` + StorageLocation string `json:"storage_location,omitempty"` + UpdatedAt int `json:"updated_at,omitempty"` + UpdatedBy string `json:"updated_by,omitempty"` + Aliases []DataSourceRegisteredModelModelInfoAliases `json:"aliases,omitempty"` +} + +type DataSourceRegisteredModel struct { + FullName string `json:"full_name"` + IncludeAliases bool `json:"include_aliases,omitempty"` + IncludeBrowse bool `json:"include_browse,omitempty"` + ModelInfo []DataSourceRegisteredModelModelInfo `json:"model_info,omitempty"` +} diff --git a/bundle/internal/tf/schema/data_sources.go b/bundle/internal/tf/schema/data_sources.go index 10829b994..050e0bc1d 100644 --- a/bundle/internal/tf/schema/data_sources.go +++ b/bundle/internal/tf/schema/data_sources.go @@ -36,7 +36,9 @@ type DataSources struct { NodeType map[string]any `json:"databricks_node_type,omitempty"` Notebook map[string]any `json:"databricks_notebook,omitempty"` NotebookPaths map[string]any `json:"databricks_notebook_paths,omitempty"` + NotificationDestinations map[string]any `json:"databricks_notification_destinations,omitempty"` Pipelines map[string]any `json:"databricks_pipelines,omitempty"` + RegisteredModel map[string]any `json:"databricks_registered_model,omitempty"` Schema map[string]any `json:"databricks_schema,omitempty"` Schemas map[string]any `json:"databricks_schemas,omitempty"` ServicePrincipal map[string]any `json:"databricks_service_principal,omitempty"` @@ -92,7 +94,9 @@ func NewDataSources() *DataSources { NodeType: make(map[string]any), Notebook: make(map[string]any), NotebookPaths: make(map[string]any), + NotificationDestinations: make(map[string]any), Pipelines: make(map[string]any), + RegisteredModel: make(map[string]any), Schema: make(map[string]any), Schemas: make(map[string]any), ServicePrincipal: make(map[string]any), diff --git a/bundle/internal/tf/schema/resource_job.go b/bundle/internal/tf/schema/resource_job.go index 42b648b0f..c89eafab9 100644 --- a/bundle/internal/tf/schema/resource_job.go +++ b/bundle/internal/tf/schema/resource_job.go @@ -1448,6 +1448,7 @@ type ResourceJobWebhookNotifications struct { type ResourceJob struct { AlwaysRunning bool `json:"always_running,omitempty"` + BudgetPolicyId string `json:"budget_policy_id,omitempty"` ControlRunState bool `json:"control_run_state,omitempty"` Description string `json:"description,omitempty"` EditMode string `json:"edit_mode,omitempty"` diff --git a/bundle/internal/tf/schema/resource_online_table.go b/bundle/internal/tf/schema/resource_online_table.go index de671eade..58d6f4ba5 100644 --- a/bundle/internal/tf/schema/resource_online_table.go +++ b/bundle/internal/tf/schema/resource_online_table.go @@ -19,9 +19,10 @@ type ResourceOnlineTableSpec struct { } type ResourceOnlineTable struct { - Id string `json:"id,omitempty"` - Name string `json:"name"` - Status []any `json:"status,omitempty"` - TableServingUrl string `json:"table_serving_url,omitempty"` - Spec *ResourceOnlineTableSpec `json:"spec,omitempty"` + Id string `json:"id,omitempty"` + Name string `json:"name"` + Status []any `json:"status,omitempty"` + TableServingUrl string `json:"table_serving_url,omitempty"` + UnityCatalogProvisioningState string `json:"unity_catalog_provisioning_state,omitempty"` + Spec *ResourceOnlineTableSpec `json:"spec,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_pipeline.go b/bundle/internal/tf/schema/resource_pipeline.go index 1bed91fcb..2cb459aba 100644 --- a/bundle/internal/tf/schema/resource_pipeline.go +++ b/bundle/internal/tf/schema/resource_pipeline.go @@ -142,10 +142,26 @@ type ResourcePipelineGatewayDefinition struct { GatewayStorageSchema string `json:"gateway_storage_schema,omitempty"` } +type ResourcePipelineIngestionDefinitionObjectsReportTableConfiguration struct { + PrimaryKeys []string `json:"primary_keys,omitempty"` + SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` + ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` +} + +type ResourcePipelineIngestionDefinitionObjectsReport struct { + DestinationCatalog string `json:"destination_catalog,omitempty"` + DestinationSchema string `json:"destination_schema,omitempty"` + DestinationTable string `json:"destination_table,omitempty"` + SourceUrl string `json:"source_url,omitempty"` + TableConfiguration *ResourcePipelineIngestionDefinitionObjectsReportTableConfiguration `json:"table_configuration,omitempty"` +} + type ResourcePipelineIngestionDefinitionObjectsSchemaTableConfiguration struct { PrimaryKeys []string `json:"primary_keys,omitempty"` SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` } type ResourcePipelineIngestionDefinitionObjectsSchema struct { @@ -160,6 +176,7 @@ type ResourcePipelineIngestionDefinitionObjectsTableTableConfiguration struct { PrimaryKeys []string `json:"primary_keys,omitempty"` SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` } type ResourcePipelineIngestionDefinitionObjectsTable struct { @@ -173,6 +190,7 @@ type ResourcePipelineIngestionDefinitionObjectsTable struct { } type ResourcePipelineIngestionDefinitionObjects struct { + Report *ResourcePipelineIngestionDefinitionObjectsReport `json:"report,omitempty"` Schema *ResourcePipelineIngestionDefinitionObjectsSchema `json:"schema,omitempty"` Table *ResourcePipelineIngestionDefinitionObjectsTable `json:"table,omitempty"` } @@ -181,6 +199,7 @@ type ResourcePipelineIngestionDefinitionTableConfiguration struct { PrimaryKeys []string `json:"primary_keys,omitempty"` SalesforceIncludeFormulaFields bool `json:"salesforce_include_formula_fields,omitempty"` ScdType string `json:"scd_type,omitempty"` + SequenceBy []string `json:"sequence_by,omitempty"` } type ResourcePipelineIngestionDefinition struct { diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index 7a0cc01f9..bf4283c9b 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -21,7 +21,7 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" -const ProviderVersion = "1.53.0" +const ProviderVersion = "1.54.0" func NewRoot() *Root { return &Root{ From 3bab21e72ee78f6711519762317e8218f9884afd Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:29:17 +0530 Subject: [PATCH 10/13] Fix race condition when restarting continuous jobs (#1849) ## Changes We don't need to cancel existing runs when the job is continuous and unpaused. The `/jobs/run-now` command will cancel the existing run and trigger a new one automatically. Cancelling the job manually can cause a race condition where both the manual trigger from the CLI and the continuous trigger from the job configuration happens at the same time. This PR prevents that from happening. ## Tests Unit tests and manually --- bundle/run/job.go | 23 +++++++ bundle/run/job_test.go | 132 ++++++++++++++++++++++++++++++++++++ bundle/run/pipeline.go | 12 ++++ bundle/run/pipeline_test.go | 70 +++++++++++++++++++ bundle/run/runner.go | 4 ++ cmd/bundle/run.go | 14 ++-- 6 files changed, 247 insertions(+), 8 deletions(-) diff --git a/bundle/run/job.go b/bundle/run/job.go index 8003c7d29..340af961c 100644 --- a/bundle/run/job.go +++ b/bundle/run/job.go @@ -317,6 +317,29 @@ func (r *jobRunner) Cancel(ctx context.Context) error { return errGroup.Wait() } +func (r *jobRunner) Restart(ctx context.Context, opts *Options) (output.RunOutput, error) { + // We don't need to cancel existing runs if the job is continuous and unpaused. + // the /jobs/run-now API will automatically cancel any existing runs before starting a new one. + // + // /jobs/run-now will not cancel existing runs if the job is continuous and paused. + // New job runs will be queued instead and will wait for existing runs to finish. + // In this case, we need to cancel the existing runs before starting a new one. + continuous := r.job.JobSettings.Continuous + if continuous != nil && continuous.PauseStatus == jobs.PauseStatusUnpaused { + return r.Run(ctx, opts) + } + + s := cmdio.Spinner(ctx) + s <- "Cancelling all active job runs" + err := r.Cancel(ctx) + close(s) + if err != nil { + return nil, err + } + + return r.Run(ctx, opts) +} + func (r *jobRunner) ParseArgs(args []string, opts *Options) error { return r.posArgsHandler().ParseArgs(args, opts) } diff --git a/bundle/run/job_test.go b/bundle/run/job_test.go index be189306b..369c546aa 100644 --- a/bundle/run/job_test.go +++ b/bundle/run/job_test.go @@ -1,6 +1,7 @@ package run import ( + "bytes" "context" "testing" "time" @@ -8,6 +9,8 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" "github.com/databricks/databricks-sdk-go/experimental/mocks" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/mock" @@ -126,3 +129,132 @@ func TestJobRunnerCancelWithNoActiveRuns(t *testing.T) { err := runner.Cancel(context.Background()) require.NoError(t, err) } + +func TestJobRunnerRestart(t *testing.T) { + for _, jobSettings := range []*jobs.JobSettings{ + {}, + { + Continuous: &jobs.Continuous{ + PauseStatus: jobs.PauseStatusPaused, + }, + }, + } { + job := &resources.Job{ + ID: "123", + JobSettings: jobSettings, + } + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "test_job": job, + }, + }, + }, + } + + runner := jobRunner{key: "test", bundle: b, job: job} + + m := mocks.NewMockWorkspaceClient(t) + b.SetWorkpaceClient(m.WorkspaceClient) + ctx := context.Background() + ctx = cmdio.InContext(ctx, cmdio.NewIO(flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "")) + ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) + + jobApi := m.GetMockJobsAPI() + jobApi.EXPECT().ListRunsAll(mock.Anything, jobs.ListRunsRequest{ + ActiveOnly: true, + JobId: 123, + }).Return([]jobs.BaseRun{ + {RunId: 1}, + {RunId: 2}, + }, nil) + + // Mock the runner cancelling existing job runs. + mockWait := &jobs.WaitGetRunJobTerminatedOrSkipped[struct{}]{ + Poll: func(time time.Duration, f func(j *jobs.Run)) (*jobs.Run, error) { + return nil, nil + }, + } + jobApi.EXPECT().CancelRun(mock.Anything, jobs.CancelRun{ + RunId: 1, + }).Return(mockWait, nil) + jobApi.EXPECT().CancelRun(mock.Anything, jobs.CancelRun{ + RunId: 2, + }).Return(mockWait, nil) + + // Mock the runner triggering a job run + mockWaitForRun := &jobs.WaitGetRunJobTerminatedOrSkipped[jobs.RunNowResponse]{ + Poll: func(d time.Duration, f func(*jobs.Run)) (*jobs.Run, error) { + return &jobs.Run{ + State: &jobs.RunState{ + ResultState: jobs.RunResultStateSuccess, + }, + }, nil + }, + } + jobApi.EXPECT().RunNow(mock.Anything, jobs.RunNow{ + JobId: 123, + }).Return(mockWaitForRun, nil) + + // Mock the runner getting the job output + jobApi.EXPECT().GetRun(mock.Anything, jobs.GetRunRequest{}).Return(&jobs.Run{}, nil) + + _, err := runner.Restart(ctx, &Options{}) + require.NoError(t, err) + } +} + +func TestJobRunnerRestartForContinuousUnpausedJobs(t *testing.T) { + job := &resources.Job{ + ID: "123", + JobSettings: &jobs.JobSettings{ + Continuous: &jobs.Continuous{ + PauseStatus: jobs.PauseStatusUnpaused, + }, + }, + } + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "test_job": job, + }, + }, + }, + } + + runner := jobRunner{key: "test", bundle: b, job: job} + + m := mocks.NewMockWorkspaceClient(t) + b.SetWorkpaceClient(m.WorkspaceClient) + ctx := context.Background() + ctx = cmdio.InContext(ctx, cmdio.NewIO(flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) + + jobApi := m.GetMockJobsAPI() + + // The runner should not try and cancel existing job runs for unpaused continuous jobs. + jobApi.AssertNotCalled(t, "ListRunsAll") + jobApi.AssertNotCalled(t, "CancelRun") + + // Mock the runner triggering a job run + mockWaitForRun := &jobs.WaitGetRunJobTerminatedOrSkipped[jobs.RunNowResponse]{ + Poll: func(d time.Duration, f func(*jobs.Run)) (*jobs.Run, error) { + return &jobs.Run{ + State: &jobs.RunState{ + ResultState: jobs.RunResultStateSuccess, + }, + }, nil + }, + } + jobApi.EXPECT().RunNow(mock.Anything, jobs.RunNow{ + JobId: 123, + }).Return(mockWaitForRun, nil) + + // Mock the runner getting the job output + jobApi.EXPECT().GetRun(mock.Anything, jobs.GetRunRequest{}).Return(&jobs.Run{}, nil) + + _, err := runner.Restart(ctx, &Options{}) + require.NoError(t, err) +} diff --git a/bundle/run/pipeline.go b/bundle/run/pipeline.go index d684f8388..ffe012843 100644 --- a/bundle/run/pipeline.go +++ b/bundle/run/pipeline.go @@ -183,6 +183,18 @@ func (r *pipelineRunner) Cancel(ctx context.Context) error { return err } +func (r *pipelineRunner) Restart(ctx context.Context, opts *Options) (output.RunOutput, error) { + s := cmdio.Spinner(ctx) + s <- "Cancelling the active pipeline update" + err := r.Cancel(ctx) + close(s) + if err != nil { + return nil, err + } + + return r.Run(ctx, opts) +} + func (r *pipelineRunner) ParseArgs(args []string, opts *Options) error { if len(args) == 0 { return nil diff --git a/bundle/run/pipeline_test.go b/bundle/run/pipeline_test.go index 29b57ffdb..e4608061c 100644 --- a/bundle/run/pipeline_test.go +++ b/bundle/run/pipeline_test.go @@ -1,6 +1,7 @@ package run import ( + "bytes" "context" "testing" "time" @@ -8,8 +9,12 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + sdk_config "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/experimental/mocks" "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) @@ -47,3 +52,68 @@ func TestPipelineRunnerCancel(t *testing.T) { err := runner.Cancel(context.Background()) require.NoError(t, err) } + +func TestPipelineRunnerRestart(t *testing.T) { + pipeline := &resources.Pipeline{ + ID: "123", + } + + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "test_pipeline": pipeline, + }, + }, + }, + } + + runner := pipelineRunner{key: "test", bundle: b, pipeline: pipeline} + + m := mocks.NewMockWorkspaceClient(t) + m.WorkspaceClient.Config = &sdk_config.Config{ + Host: "https://test.com", + } + b.SetWorkpaceClient(m.WorkspaceClient) + ctx := context.Background() + ctx = cmdio.InContext(ctx, cmdio.NewIO(flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) + + mockWait := &pipelines.WaitGetPipelineIdle[struct{}]{ + Poll: func(time.Duration, func(*pipelines.GetPipelineResponse)) (*pipelines.GetPipelineResponse, error) { + return nil, nil + }, + } + + pipelineApi := m.GetMockPipelinesAPI() + pipelineApi.EXPECT().Stop(mock.Anything, pipelines.StopRequest{ + PipelineId: "123", + }).Return(mockWait, nil) + + pipelineApi.EXPECT().GetByPipelineId(mock.Anything, "123").Return(&pipelines.GetPipelineResponse{}, nil) + + // Mock runner starting a new update + pipelineApi.EXPECT().StartUpdate(mock.Anything, pipelines.StartUpdate{ + PipelineId: "123", + }).Return(&pipelines.StartUpdateResponse{ + UpdateId: "456", + }, nil) + + // Mock runner polling for events + pipelineApi.EXPECT().ListPipelineEventsAll(mock.Anything, pipelines.ListPipelineEventsRequest{ + Filter: `update_id = '456'`, + MaxResults: 100, + PipelineId: "123", + }).Return([]pipelines.PipelineEvent{}, nil) + + // Mock runner polling for update status + pipelineApi.EXPECT().GetUpdateByPipelineIdAndUpdateId(mock.Anything, "123", "456"). + Return(&pipelines.GetUpdateResponse{ + Update: &pipelines.UpdateInfo{ + State: pipelines.UpdateInfoStateCompleted, + }, + }, nil) + + _, err := runner.Restart(ctx, &Options{}) + require.NoError(t, err) +} diff --git a/bundle/run/runner.go b/bundle/run/runner.go index 0f202ce7d..1cdcc9d8b 100644 --- a/bundle/run/runner.go +++ b/bundle/run/runner.go @@ -27,6 +27,10 @@ type Runner interface { // Run the underlying worklow. Run(ctx context.Context, opts *Options) (output.RunOutput, error) + // Restart the underlying workflow by cancelling any existing runs before + // starting a new one. + Restart(ctx context.Context, opts *Options) (output.RunOutput, error) + // Cancel the underlying workflow. Cancel(ctx context.Context) error diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index 9ef5eb8ff..ed5bd2ef1 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/bundle/run" + "github.com/databricks/cli/bundle/run/output" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" @@ -100,19 +101,16 @@ task or a Python wheel task, the second example applies. } runOptions.NoWait = noWait + var output output.RunOutput if restart { - s := cmdio.Spinner(ctx) - s <- "Cancelling all runs" - err := runner.Cancel(ctx) - close(s) - if err != nil { - return err - } + output, err = runner.Restart(ctx, &runOptions) + } else { + output, err = runner.Run(ctx, &runOptions) } - output, err := runner.Run(ctx, &runOptions) if err != nil { return err } + if output != nil { switch root.OutputType(cmd) { case flags.OutputText: From 60c153c0e765e6a7fd53e20f2ce431b7f3a70812 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Tue, 22 Oct 2024 17:52:46 +0200 Subject: [PATCH 11/13] Fix pipeline in default-python template not working for certain workspaces (#1854) Change the default-python template to not set the `catalog` field for the pipeline for workspaces that set `hive_metastore` as the default catalog. The Pipelines service currently returns an error when that value is used for the `catalog` field. This is the most simple fix for this issue, which was reported by a customer. As a followup, we should look at whether we want to prompt for a catalog instead, possibly just for this specific scenario. --- .../resources/{{.project_name}}.pipeline.yml.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index 50e5ad97c..1c6b8607e 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -3,7 +3,7 @@ resources: pipelines: {{.project_name}}_pipeline: name: {{.project_name}}_pipeline - {{- if eq default_catalog ""}} + {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: # catalog: catalog_name {{- else}} From 55a055d0f532c8ab60bdbccc51c106fb051520d8 Mon Sep 17 00:00:00 2001 From: Ilia Babanov Date: Wed, 23 Oct 2024 13:08:12 +0200 Subject: [PATCH 12/13] Add "output" flag to the bundle sync command (#1853) ## Changes We want to use 'bundle sync' in the vscode extension before running a file as an ad-hoc job (or through the context api). Right now we use bundle deploy in these cases, but deploying bundle resources is not always expected when you just want to quickly run a file. Sync makes more sense in these cases, but we still want to have verbose output to see what's happening. In the 'deploy' command we have hidden 'verbose' flag. For the sync I've just added 'output' flag, handling both json and text cases, similar to how it's done in the non-bundle `sync` command. The flag is not hidden (although we still don't show any output by default, if the flag is not set). VSCode Extension PR: https://github.com/databricks/databricks-vscode/pull/1401 ## Tests Manually --- cmd/bundle/sync.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/cmd/bundle/sync.go b/cmd/bundle/sync.go index df3e087c2..274bba0e0 100644 --- a/cmd/bundle/sync.go +++ b/cmd/bundle/sync.go @@ -1,7 +1,9 @@ package bundle import ( + "context" "fmt" + "io" "time" "github.com/databricks/cli/bundle" @@ -9,6 +11,7 @@ import ( "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/sync" "github.com/spf13/cobra" @@ -18,6 +21,7 @@ type syncFlags struct { interval time.Duration full bool watch bool + output flags.Output } func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, b *bundle.Bundle) (*sync.SyncOptions, error) { @@ -26,6 +30,21 @@ func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, b *bundle.Bundle) return nil, fmt.Errorf("cannot get sync options: %w", err) } + if f.output != "" { + var outputFunc func(context.Context, <-chan sync.Event, io.Writer) + switch f.output { + case flags.OutputText: + outputFunc = sync.TextOutput + case flags.OutputJSON: + outputFunc = sync.JsonOutput + } + if outputFunc != nil { + opts.OutputHandler = func(ctx context.Context, c <-chan sync.Event) { + outputFunc(ctx, c, cmd.OutOrStdout()) + } + } + } + opts.Full = f.full opts.PollInterval = f.interval return opts, nil @@ -42,6 +61,7 @@ func newSyncCommand() *cobra.Command { cmd.Flags().DurationVar(&f.interval, "interval", 1*time.Second, "file system polling interval (for --watch)") cmd.Flags().BoolVar(&f.full, "full", false, "perform full synchronization (default is incremental)") cmd.Flags().BoolVar(&f.watch, "watch", false, "watch local file system for changes") + cmd.Flags().Var(&f.output, "output", "type of the output format") cmd.RunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() @@ -65,6 +85,7 @@ func newSyncCommand() *cobra.Command { if err != nil { return err } + defer s.Close() log.Infof(ctx, "Remote file sync location: %v", opts.RemotePath) From ab622e65bbf8d90dcc8f363a978f871bb04554af Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 23 Oct 2024 16:08:27 +0200 Subject: [PATCH 13/13] [Release] Release v0.231.0 (#1856) CLI: * Added JSON input validation for CLI commands ([#1771](https://github.com/databricks/cli/pull/1771)). * Support Git worktrees for `sync` ([#1831](https://github.com/databricks/cli/pull/1831)). Bundles: * Add `bundle summary` to display URLs for deployed resources ([#1731](https://github.com/databricks/cli/pull/1731)). * Added a warning when incorrect permissions used for `/Workspace/Shared` bundle root ([#1821](https://github.com/databricks/cli/pull/1821)). * Show actionable errors for collaborative deployment scenarios ([#1386](https://github.com/databricks/cli/pull/1386)). * Fix path to repository-wide exclude file ([#1837](https://github.com/databricks/cli/pull/1837)). * Fixed typo in converting cluster permissions ([#1826](https://github.com/databricks/cli/pull/1826)). * Ignore metastore permission error during template generation ([#1819](https://github.com/databricks/cli/pull/1819)). * Handle normalization of `dyn.KindTime` into an any type ([#1836](https://github.com/databricks/cli/pull/1836)). * Added support for pip options in environment dependencies ([#1842](https://github.com/databricks/cli/pull/1842)). * Fix race condition when restarting continuous jobs ([#1849](https://github.com/databricks/cli/pull/1849)). * Fix pipeline in default-python template not working for certain workspaces ([#1854](https://github.com/databricks/cli/pull/1854)). * Add "output" flag to the bundle sync command ([#1853](https://github.com/databricks/cli/pull/1853)). Internal: * Move utility functions dealing with IAM to libs/iamutil ([#1820](https://github.com/databricks/cli/pull/1820)). * Remove unused `IS_OWNER` constant ([#1823](https://github.com/databricks/cli/pull/1823)). * Assert SDK version is consistent in the CLI generation process ([#1814](https://github.com/databricks/cli/pull/1814)). * Fixed unmarshalling json input into `interface{}` type ([#1832](https://github.com/databricks/cli/pull/1832)). * Fix `TestAccFsMkdirWhenFileExistsAtPath` in isolated Azure environments ([#1833](https://github.com/databricks/cli/pull/1833)). * Add behavioral tests for examples from the YAML spec ([#1835](https://github.com/databricks/cli/pull/1835)). * Remove Terraform conversion function that's no longer used ([#1840](https://github.com/databricks/cli/pull/1840)). * Encode assumptions about the dashboards API in a test ([#1839](https://github.com/databricks/cli/pull/1839)). * Add script to make testing of code on branches easier ([#1844](https://github.com/databricks/cli/pull/1844)). API Changes: * Added `databricks disable-legacy-dbfs` command group. OpenAPI commit cf9c61453990df0f9453670f2fe68e1b128647a2 (2024-10-14) Dependency updates: * Upgrade TF provider to 1.54.0 ([#1852](https://github.com/databricks/cli/pull/1852)). * Bump github.com/databricks/databricks-sdk-go from 0.48.0 to 0.49.0 ([#1843](https://github.com/databricks/cli/pull/1843)). --- CHANGELOG.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f31bb10b2..863474934 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Version changelog +## [Release] Release v0.231.0 + +CLI: + * Added JSON input validation for CLI commands ([#1771](https://github.com/databricks/cli/pull/1771)). + * Support Git worktrees for `sync` ([#1831](https://github.com/databricks/cli/pull/1831)). + +Bundles: + * Add `bundle summary` to display URLs for deployed resources ([#1731](https://github.com/databricks/cli/pull/1731)). + * Added a warning when incorrect permissions used for `/Workspace/Shared` bundle root ([#1821](https://github.com/databricks/cli/pull/1821)). + * Show actionable errors for collaborative deployment scenarios ([#1386](https://github.com/databricks/cli/pull/1386)). + * Fix path to repository-wide exclude file ([#1837](https://github.com/databricks/cli/pull/1837)). + * Fixed typo in converting cluster permissions ([#1826](https://github.com/databricks/cli/pull/1826)). + * Ignore metastore permission error during template generation ([#1819](https://github.com/databricks/cli/pull/1819)). + * Handle normalization of `dyn.KindTime` into an any type ([#1836](https://github.com/databricks/cli/pull/1836)). + * Added support for pip options in environment dependencies ([#1842](https://github.com/databricks/cli/pull/1842)). + * Fix race condition when restarting continuous jobs ([#1849](https://github.com/databricks/cli/pull/1849)). + * Fix pipeline in default-python template not working for certain workspaces ([#1854](https://github.com/databricks/cli/pull/1854)). + * Add "output" flag to the bundle sync command ([#1853](https://github.com/databricks/cli/pull/1853)). + +Internal: + * Move utility functions dealing with IAM to libs/iamutil ([#1820](https://github.com/databricks/cli/pull/1820)). + * Remove unused `IS_OWNER` constant ([#1823](https://github.com/databricks/cli/pull/1823)). + * Assert SDK version is consistent in the CLI generation process ([#1814](https://github.com/databricks/cli/pull/1814)). + * Fixed unmarshalling json input into `interface{}` type ([#1832](https://github.com/databricks/cli/pull/1832)). + * Fix `TestAccFsMkdirWhenFileExistsAtPath` in isolated Azure environments ([#1833](https://github.com/databricks/cli/pull/1833)). + * Add behavioral tests for examples from the YAML spec ([#1835](https://github.com/databricks/cli/pull/1835)). + * Remove Terraform conversion function that's no longer used ([#1840](https://github.com/databricks/cli/pull/1840)). + * Encode assumptions about the dashboards API in a test ([#1839](https://github.com/databricks/cli/pull/1839)). + * Add script to make testing of code on branches easier ([#1844](https://github.com/databricks/cli/pull/1844)). + +API Changes: + * Added `databricks disable-legacy-dbfs` command group. + +OpenAPI commit cf9c61453990df0f9453670f2fe68e1b128647a2 (2024-10-14) +Dependency updates: + * Upgrade TF provider to 1.54.0 ([#1852](https://github.com/databricks/cli/pull/1852)). + * Bump github.com/databricks/databricks-sdk-go from 0.48.0 to 0.49.0 ([#1843](https://github.com/databricks/cli/pull/1843)). + ## [Release] Release v0.230.0 Notable changes for Databricks Asset Bundles: