diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 6f14fe88..3209ae93 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -26,10 +26,9 @@ jobs: run: git fetch --prune --unshallow - name: Setup Go - uses: actions/setup-go@v3 + uses: actions/setup-go@v4 with: go-version: 1.21.0 - cache: true - name: Set go env run: | @@ -54,9 +53,9 @@ jobs: uses: actions/checkout@v3 - name: Setup Go - uses: actions/setup-go@v3 + uses: actions/setup-go@v4 with: - go-version: 1.21 + go-version: 1.21.0 # No need to download cached dependencies when running gofmt. cache: false diff --git a/.github/workflows/release-snapshot.yml b/.github/workflows/release-snapshot.yml index 130d49dd..fbf5421b 100644 --- a/.github/workflows/release-snapshot.yml +++ b/.github/workflows/release-snapshot.yml @@ -19,27 +19,10 @@ jobs: run: git fetch --prune --unshallow - name: Setup Go - id: go - uses: actions/setup-go@v3 + uses: actions/setup-go@v4 with: go-version: 1.21.0 - - name: Locate cache paths - id: cache - run: | - echo "GOMODCACHE=$(go env GOMODCACHE)" >> $GITHUB_OUTPUT - echo "GOCACHE=$(go env GOCACHE)" >> $GITHUB_OUTPUT - - # Note: use custom caching because below performs a cross platform build - # through goreleaser and don't want to share a cache with the test builds. - - name: Setup caching - uses: actions/cache@v3 - with: - path: | - ${{ steps.cache.outputs.GOMODCACHE }} - ${{ steps.cache.outputs.GOCACHE }} - key: release-${{ runner.os }}-go-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', '.goreleaser.yaml') }} - - name: Hide snapshot tag to outsmart GoReleaser run: git tag -d snapshot || true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5992dcb4..c166fc5b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,27 +18,10 @@ jobs: run: git fetch --prune --unshallow - name: Setup Go - id: go - uses: actions/setup-go@v3 + uses: actions/setup-go@v4 with: go-version: 1.21.0 - - name: Locate cache paths - id: cache - run: | - echo "GOMODCACHE=$(go env GOMODCACHE)" >> $GITHUB_OUTPUT - echo "GOCACHE=$(go env GOCACHE)" >> $GITHUB_OUTPUT - - # Note: use custom caching because below performs a cross platform build - # through goreleaser and don't want to share a cache with the test builds. - - name: Setup caching - uses: actions/cache@v3 - with: - path: | - ${{ steps.cache.outputs.GOMODCACHE }} - ${{ steps.cache.outputs.GOCACHE }} - key: release-${{ runner.os }}-go-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', '.goreleaser.yaml') }} - - name: Run GoReleaser uses: goreleaser/goreleaser-action@v4 with: diff --git a/CHANGELOG.md b/CHANGELOG.md index fa0dec13..6fcbab8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Version changelog +## 0.203.3 + +Bundles: + * Support cluster overrides with cluster_key and compute_key ([#696](https://github.com/databricks/cli/pull/696)). + * Allow referencing local Python wheels without artifacts section defined ([#703](https://github.com/databricks/cli/pull/703)). + * Fixed --environment flag ([#705](https://github.com/databricks/cli/pull/705)). + * Correctly identify local paths in libraries section ([#702](https://github.com/databricks/cli/pull/702)). + * Fixed path joining in FindFilesWithSuffixInPath ([#704](https://github.com/databricks/cli/pull/704)). + * Added transformation mutator for Python wheel task for them to work on DBR <13.1 ([#635](https://github.com/databricks/cli/pull/635)). + +Internal: + * Add a foundation for built-in templates ([#685](https://github.com/databricks/cli/pull/685)). + * Test transform when no Python wheel tasks defined ([#714](https://github.com/databricks/cli/pull/714)). + * Pin Terraform binary version to 1.5.5 ([#715](https://github.com/databricks/cli/pull/715)). + * Cleanup after "Add a foundation for built-in templates" ([#707](https://github.com/databricks/cli/pull/707)). + * Filter down to Python wheel tasks only for trampoline ([#712](https://github.com/databricks/cli/pull/712)). + * Update Terraform provider schema structs from 1.23.0 ([#713](https://github.com/databricks/cli/pull/713)). + ## 0.203.2 CLI: diff --git a/bundle/config/mutator/process_target_mode.go b/bundle/config/mutator/process_target_mode.go index 3a00d42f..be93512b 100644 --- a/bundle/config/mutator/process_target_mode.go +++ b/bundle/config/mutator/process_target_mode.go @@ -160,7 +160,10 @@ func (m *processTargetMode) Apply(ctx context.Context, b *bundle.Bundle) error { } return transformDevelopmentMode(b) case config.Production: - isPrincipal := auth.IsServicePrincipal(ctx, b.WorkspaceClient(), b.Config.Workspace.CurrentUser.Id) + isPrincipal, err := auth.IsServicePrincipal(ctx, b.WorkspaceClient(), b.Config.Workspace.CurrentUser.Id) + if err != nil { + return err + } return validateProductionMode(ctx, b, isPrincipal) case "": // No action diff --git a/bundle/config/mutator/trampoline.go b/bundle/config/mutator/trampoline.go index 6265c28d..7c06c7fa 100644 --- a/bundle/config/mutator/trampoline.go +++ b/bundle/config/mutator/trampoline.go @@ -12,26 +12,28 @@ import ( "github.com/databricks/databricks-sdk-go/service/jobs" ) -type fnTemplateData func(task *jobs.Task) (map[string]any, error) -type fnCleanUp func(task *jobs.Task) -type fnTasks func(b *bundle.Bundle) []*jobs.Task +type TaskWithJobKey struct { + Task *jobs.Task + JobKey string +} +type TrampolineFunctions interface { + GetTemplateData(task *jobs.Task) (map[string]any, error) + GetTasks(b *bundle.Bundle) []TaskWithJobKey + CleanUp(task *jobs.Task) error +} type trampoline struct { - name string - getTasks fnTasks - templateData fnTemplateData - cleanUp fnCleanUp - template string + name string + functions TrampolineFunctions + template string } func NewTrampoline( name string, - tasks fnTasks, - templateData fnTemplateData, - cleanUp fnCleanUp, + functions TrampolineFunctions, template string, ) *trampoline { - return &trampoline{name, tasks, templateData, cleanUp, template} + return &trampoline{name, functions, template} } func (m *trampoline) Name() string { @@ -39,7 +41,7 @@ func (m *trampoline) Name() string { } func (m *trampoline) Apply(ctx context.Context, b *bundle.Bundle) error { - tasks := m.getTasks(b) + tasks := m.functions.GetTasks(b) for _, task := range tasks { err := m.generateNotebookWrapper(b, task) if err != nil { @@ -49,13 +51,13 @@ func (m *trampoline) Apply(ctx context.Context, b *bundle.Bundle) error { return nil } -func (m *trampoline) generateNotebookWrapper(b *bundle.Bundle, task *jobs.Task) error { +func (m *trampoline) generateNotebookWrapper(b *bundle.Bundle, task TaskWithJobKey) error { internalDir, err := b.InternalDir() if err != nil { return err } - notebookName := fmt.Sprintf("notebook_%s", task.TaskKey) + notebookName := fmt.Sprintf("notebook_%s_%s", task.JobKey, task.Task.TaskKey) localNotebookPath := filepath.Join(internalDir, notebookName+".py") err = os.MkdirAll(filepath.Dir(localNotebookPath), 0755) @@ -69,7 +71,7 @@ func (m *trampoline) generateNotebookWrapper(b *bundle.Bundle, task *jobs.Task) } defer f.Close() - data, err := m.templateData(task) + data, err := m.functions.GetTemplateData(task.Task) if err != nil { return err } @@ -84,10 +86,13 @@ func (m *trampoline) generateNotebookWrapper(b *bundle.Bundle, task *jobs.Task) return err } - m.cleanUp(task) + err = m.functions.CleanUp(task.Task) + if err != nil { + return err + } remotePath := path.Join(b.Config.Workspace.FilesPath, filepath.ToSlash(internalDirRel), notebookName) - task.NotebookTask = &jobs.NotebookTask{ + task.Task.NotebookTask = &jobs.NotebookTask{ NotebookPath: remotePath, } diff --git a/bundle/config/mutator/trampoline_test.go b/bundle/config/mutator/trampoline_test.go index 9c176ff0..e523250e 100644 --- a/bundle/config/mutator/trampoline_test.go +++ b/bundle/config/mutator/trampoline_test.go @@ -14,16 +14,21 @@ import ( "github.com/stretchr/testify/require" ) -func getTasks(b *bundle.Bundle) []*jobs.Task { - tasks := make([]*jobs.Task, 0) +type functions struct{} + +func (f *functions) GetTasks(b *bundle.Bundle) []TaskWithJobKey { + tasks := make([]TaskWithJobKey, 0) for k := range b.Config.Resources.Jobs["test"].Tasks { - tasks = append(tasks, &b.Config.Resources.Jobs["test"].Tasks[k]) + tasks = append(tasks, TaskWithJobKey{ + JobKey: "test", + Task: &b.Config.Resources.Jobs["test"].Tasks[k], + }) } return tasks } -func templateData(task *jobs.Task) (map[string]any, error) { +func (f *functions) GetTemplateData(task *jobs.Task) (map[string]any, error) { if task.PythonWheelTask == nil { return nil, fmt.Errorf("PythonWheelTask cannot be nil") } @@ -33,8 +38,9 @@ func templateData(task *jobs.Task) (map[string]any, error) { return data, nil } -func cleanUp(task *jobs.Task) { +func (f *functions) CleanUp(task *jobs.Task) error { task.PythonWheelTask = nil + return nil } func TestGenerateTrampoline(t *testing.T) { @@ -71,13 +77,14 @@ func TestGenerateTrampoline(t *testing.T) { } ctx := context.Background() - trampoline := NewTrampoline("test_trampoline", getTasks, templateData, cleanUp, "Hello from {{.MyName}}") + funcs := functions{} + trampoline := NewTrampoline("test_trampoline", &funcs, "Hello from {{.MyName}}") err := bundle.Apply(ctx, b, trampoline) require.NoError(t, err) dir, err := b.InternalDir() require.NoError(t, err) - filename := filepath.Join(dir, "notebook_to_trampoline.py") + filename := filepath.Join(dir, "notebook_test_to_trampoline.py") bytes, err := os.ReadFile(filename) require.NoError(t, err) @@ -85,6 +92,6 @@ func TestGenerateTrampoline(t *testing.T) { require.Equal(t, "Hello from Trampoline", string(bytes)) task := b.Config.Resources.Jobs["test"].Tasks[0] - require.Equal(t, task.NotebookTask.NotebookPath, ".databricks/bundle/development/.internal/notebook_to_trampoline") + require.Equal(t, task.NotebookTask.NotebookPath, ".databricks/bundle/development/.internal/notebook_test_to_trampoline") require.Nil(t, task.PythonWheelTask) } diff --git a/bundle/deploy/terraform/init.go b/bundle/deploy/terraform/init.go index 924c1f09..6df7b8d4 100644 --- a/bundle/deploy/terraform/init.go +++ b/bundle/deploy/terraform/init.go @@ -55,10 +55,10 @@ func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *con } // Download Terraform to private bin directory. - installer := &releases.LatestVersion{ - Product: product.Terraform, - Constraints: version.MustConstraints(version.NewConstraint("<=1.5.5")), - InstallDir: binDir, + installer := &releases.ExactVersion{ + Product: product.Terraform, + Version: version.Must(version.NewVersion("1.5.5")), + InstallDir: binDir, } execPath, err = installer.Install(ctx) if err != nil { diff --git a/bundle/internal/tf/codegen/generator/generator.go b/bundle/internal/tf/codegen/generator/generator.go index 2bd78d96..86d76243 100644 --- a/bundle/internal/tf/codegen/generator/generator.go +++ b/bundle/internal/tf/codegen/generator/generator.go @@ -8,6 +8,7 @@ import ( "strings" "text/template" + schemapkg "github.com/databricks/cli/bundle/internal/tf/codegen/schema" tfjson "github.com/hashicorp/terraform-json" ) @@ -32,6 +33,23 @@ func (c *collection) Generate(path string) error { return tmpl.Execute(f, c) } +type root struct { + OutputFile string + ProviderVersion string +} + +func (r *root) Generate(path string) error { + tmpl := template.Must(template.ParseFiles(fmt.Sprintf("./templates/%s.tmpl", r.OutputFile))) + f, err := os.Create(filepath.Join(path, r.OutputFile)) + if err != nil { + return err + } + + defer f.Close() + + return tmpl.Execute(f, r) +} + func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error { // Generate types for resources. var resources []*namedBlock @@ -105,5 +123,17 @@ func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error } } + // Generate root.go + { + r := &root{ + OutputFile: "root.go", + ProviderVersion: schemapkg.ProviderVersion, + } + err := r.Generate(path) + if err != nil { + return err + } + } + return nil } diff --git a/bundle/internal/tf/codegen/schema/generate.go b/bundle/internal/tf/codegen/schema/generate.go index 4d3e2832..de2d2722 100644 --- a/bundle/internal/tf/codegen/schema/generate.go +++ b/bundle/internal/tf/codegen/schema/generate.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" + "github.com/hashicorp/go-version" "github.com/hashicorp/hc-install/product" "github.com/hashicorp/hc-install/releases" "github.com/hashicorp/terraform-exec/tfexec" @@ -19,7 +20,7 @@ func (s *Schema) writeTerraformBlock(_ context.Context) error { "required_providers": map[string]interface{}{ "databricks": map[string]interface{}{ "source": "databricks/databricks", - "version": ">= 1.0.0", + "version": ProviderVersion, }, }, }, @@ -40,9 +41,10 @@ func (s *Schema) installTerraform(ctx context.Context) (path string, err error) return } - installer := &releases.LatestVersion{ - InstallDir: installDir, + installer := &releases.ExactVersion{ Product: product.Terraform, + Version: version.Must(version.NewVersion("1.5.5")), + InstallDir: installDir, } installer.SetLogger(log.Default()) diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go new file mode 100644 index 00000000..84456731 --- /dev/null +++ b/bundle/internal/tf/codegen/schema/version.go @@ -0,0 +1,3 @@ +package schema + +const ProviderVersion = "1.23.0" diff --git a/bundle/internal/tf/codegen/templates/root.go.tmpl b/bundle/internal/tf/codegen/templates/root.go.tmpl new file mode 100644 index 00000000..3beb3007 --- /dev/null +++ b/bundle/internal/tf/codegen/templates/root.go.tmpl @@ -0,0 +1,32 @@ +package schema + +type Providers struct { + Databricks *Config `json:"databricks,omitempty"` +} + +func NewProviders() *Providers { + return &Providers{ + Databricks: &Config{}, + } +} + +type Root struct { + Terraform map[string]any `json:"terraform"` + + Provider *Providers `json:"provider,omitempty"` + Data *DataSources `json:"data,omitempty"` + Resource *Resources `json:"resource,omitempty"` +} + +func NewRoot() *Root { + return &Root{ + Terraform: map[string]interface{}{ + "required_providers": map[string]interface{}{ + "databricks": map[string]interface{}{ + "source": "databricks/databricks", + "version": "1.23.0", + }, + }, + }, + } +} diff --git a/bundle/internal/tf/schema/data_source_cluster.go b/bundle/internal/tf/schema/data_source_cluster.go index b5017402..2aa6fb5d 100644 --- a/bundle/internal/tf/schema/data_source_cluster.go +++ b/bundle/internal/tf/schema/data_source_cluster.go @@ -90,6 +90,7 @@ type DataSourceClusterClusterInfoGcpAttributes struct { Availability string `json:"availability,omitempty"` BootDiskSize int `json:"boot_disk_size,omitempty"` GoogleServiceAccount string `json:"google_service_account,omitempty"` + LocalSsdCount int `json:"local_ssd_count,omitempty"` UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"` ZoneId string `json:"zone_id,omitempty"` } diff --git a/bundle/internal/tf/schema/data_source_instance_pool.go b/bundle/internal/tf/schema/data_source_instance_pool.go index 49824717..240083d6 100644 --- a/bundle/internal/tf/schema/data_source_instance_pool.go +++ b/bundle/internal/tf/schema/data_source_instance_pool.go @@ -26,6 +26,7 @@ type DataSourceInstancePoolPoolInfoDiskSpec struct { type DataSourceInstancePoolPoolInfoGcpAttributes struct { GcpAvailability string `json:"gcp_availability,omitempty"` + LocalSsdCount int `json:"local_ssd_count,omitempty"` } type DataSourceInstancePoolPoolInfoInstancePoolFleetAttributesFleetOnDemandOption struct { diff --git a/bundle/internal/tf/schema/data_source_job.go b/bundle/internal/tf/schema/data_source_job.go index 6d2d1aa9..d251dfe5 100644 --- a/bundle/internal/tf/schema/data_source_job.go +++ b/bundle/internal/tf/schema/data_source_job.go @@ -124,6 +124,7 @@ type DataSourceJobJobSettingsSettingsJobClusterNewClusterGcpAttributes struct { Availability string `json:"availability,omitempty"` BootDiskSize int `json:"boot_disk_size,omitempty"` GoogleServiceAccount string `json:"google_service_account,omitempty"` + LocalSsdCount int `json:"local_ssd_count,omitempty"` UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"` ZoneId string `json:"zone_id,omitempty"` } @@ -305,6 +306,7 @@ type DataSourceJobJobSettingsSettingsNewClusterGcpAttributes struct { Availability string `json:"availability,omitempty"` BootDiskSize int `json:"boot_disk_size,omitempty"` GoogleServiceAccount string `json:"google_service_account,omitempty"` + LocalSsdCount int `json:"local_ssd_count,omitempty"` UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"` ZoneId string `json:"zone_id,omitempty"` } @@ -401,6 +403,11 @@ type DataSourceJobJobSettingsSettingsNotificationSettings struct { NoAlertForSkippedRuns bool `json:"no_alert_for_skipped_runs,omitempty"` } +type DataSourceJobJobSettingsSettingsParameter struct { + Default string `json:"default,omitempty"` + Name string `json:"name,omitempty"` +} + type DataSourceJobJobSettingsSettingsPipelineTask struct { FullRefresh bool `json:"full_refresh,omitempty"` PipelineId string `json:"pipeline_id"` @@ -421,6 +428,11 @@ type DataSourceJobJobSettingsSettingsRunAs struct { UserName string `json:"user_name,omitempty"` } +type DataSourceJobJobSettingsSettingsRunJobTask struct { + JobId string `json:"job_id"` + JobParameters map[string]string `json:"job_parameters,omitempty"` +} + type DataSourceJobJobSettingsSettingsSchedule struct { PauseStatus string `json:"pause_status,omitempty"` QuartzCronExpression string `json:"quartz_cron_expression"` @@ -573,6 +585,7 @@ type DataSourceJobJobSettingsSettingsTaskNewClusterGcpAttributes struct { Availability string `json:"availability,omitempty"` BootDiskSize int `json:"boot_disk_size,omitempty"` GoogleServiceAccount string `json:"google_service_account,omitempty"` + LocalSsdCount int `json:"local_ssd_count,omitempty"` UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"` ZoneId string `json:"zone_id,omitempty"` } @@ -682,6 +695,11 @@ type DataSourceJobJobSettingsSettingsTaskPythonWheelTask struct { Parameters []string `json:"parameters,omitempty"` } +type DataSourceJobJobSettingsSettingsTaskRunJobTask struct { + JobId string `json:"job_id"` + JobParameters map[string]string `json:"job_parameters,omitempty"` +} + type DataSourceJobJobSettingsSettingsTaskSparkJarTask struct { JarUri string `json:"jar_uri,omitempty"` MainClassName string `json:"main_class_name,omitempty"` @@ -760,6 +778,7 @@ type DataSourceJobJobSettingsSettingsTask struct { NotificationSettings *DataSourceJobJobSettingsSettingsTaskNotificationSettings `json:"notification_settings,omitempty"` PipelineTask *DataSourceJobJobSettingsSettingsTaskPipelineTask `json:"pipeline_task,omitempty"` PythonWheelTask *DataSourceJobJobSettingsSettingsTaskPythonWheelTask `json:"python_wheel_task,omitempty"` + RunJobTask *DataSourceJobJobSettingsSettingsTaskRunJobTask `json:"run_job_task,omitempty"` SparkJarTask *DataSourceJobJobSettingsSettingsTaskSparkJarTask `json:"spark_jar_task,omitempty"` SparkPythonTask *DataSourceJobJobSettingsSettingsTaskSparkPythonTask `json:"spark_python_task,omitempty"` SparkSubmitTask *DataSourceJobJobSettingsSettingsTaskSparkSubmitTask `json:"spark_submit_task,omitempty"` @@ -821,10 +840,12 @@ type DataSourceJobJobSettingsSettings struct { NewCluster *DataSourceJobJobSettingsSettingsNewCluster `json:"new_cluster,omitempty"` NotebookTask *DataSourceJobJobSettingsSettingsNotebookTask `json:"notebook_task,omitempty"` NotificationSettings *DataSourceJobJobSettingsSettingsNotificationSettings `json:"notification_settings,omitempty"` + Parameter []DataSourceJobJobSettingsSettingsParameter `json:"parameter,omitempty"` PipelineTask *DataSourceJobJobSettingsSettingsPipelineTask `json:"pipeline_task,omitempty"` PythonWheelTask *DataSourceJobJobSettingsSettingsPythonWheelTask `json:"python_wheel_task,omitempty"` Queue *DataSourceJobJobSettingsSettingsQueue `json:"queue,omitempty"` RunAs *DataSourceJobJobSettingsSettingsRunAs `json:"run_as,omitempty"` + RunJobTask *DataSourceJobJobSettingsSettingsRunJobTask `json:"run_job_task,omitempty"` Schedule *DataSourceJobJobSettingsSettingsSchedule `json:"schedule,omitempty"` SparkJarTask *DataSourceJobJobSettingsSettingsSparkJarTask `json:"spark_jar_task,omitempty"` SparkPythonTask *DataSourceJobJobSettingsSettingsSparkPythonTask `json:"spark_python_task,omitempty"` diff --git a/bundle/internal/tf/schema/resource_cluster.go b/bundle/internal/tf/schema/resource_cluster.go index a95b8c13..bb4e3582 100644 --- a/bundle/internal/tf/schema/resource_cluster.go +++ b/bundle/internal/tf/schema/resource_cluster.go @@ -68,6 +68,7 @@ type ResourceClusterGcpAttributes struct { Availability string `json:"availability,omitempty"` BootDiskSize int `json:"boot_disk_size,omitempty"` GoogleServiceAccount string `json:"google_service_account,omitempty"` + LocalSsdCount int `json:"local_ssd_count,omitempty"` UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"` ZoneId string `json:"zone_id,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_connection.go b/bundle/internal/tf/schema/resource_connection.go new file mode 100644 index 00000000..a249a539 --- /dev/null +++ b/bundle/internal/tf/schema/resource_connection.go @@ -0,0 +1,15 @@ +// Generated from Databricks Terraform provider schema. DO NOT EDIT. + +package schema + +type ResourceConnection struct { + Comment string `json:"comment,omitempty"` + ConnectionType string `json:"connection_type"` + Id string `json:"id,omitempty"` + MetastoreId string `json:"metastore_id,omitempty"` + Name string `json:"name"` + Options map[string]string `json:"options"` + Owner string `json:"owner,omitempty"` + Properties map[string]string `json:"properties,omitempty"` + ReadOnly bool `json:"read_only,omitempty"` +} diff --git a/bundle/internal/tf/schema/resource_instance_pool.go b/bundle/internal/tf/schema/resource_instance_pool.go index 2c322121..f524b3fc 100644 --- a/bundle/internal/tf/schema/resource_instance_pool.go +++ b/bundle/internal/tf/schema/resource_instance_pool.go @@ -26,6 +26,7 @@ type ResourceInstancePoolDiskSpec struct { type ResourceInstancePoolGcpAttributes struct { GcpAvailability string `json:"gcp_availability,omitempty"` + LocalSsdCount int `json:"local_ssd_count,omitempty"` } type ResourceInstancePoolInstancePoolFleetAttributesFleetOnDemandOption struct { diff --git a/bundle/internal/tf/schema/resource_job.go b/bundle/internal/tf/schema/resource_job.go index 77b681ee..50101400 100644 --- a/bundle/internal/tf/schema/resource_job.go +++ b/bundle/internal/tf/schema/resource_job.go @@ -124,6 +124,7 @@ type ResourceJobJobClusterNewClusterGcpAttributes struct { Availability string `json:"availability,omitempty"` BootDiskSize int `json:"boot_disk_size,omitempty"` GoogleServiceAccount string `json:"google_service_account,omitempty"` + LocalSsdCount int `json:"local_ssd_count,omitempty"` UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"` ZoneId string `json:"zone_id,omitempty"` } @@ -305,6 +306,7 @@ type ResourceJobNewClusterGcpAttributes struct { Availability string `json:"availability,omitempty"` BootDiskSize int `json:"boot_disk_size,omitempty"` GoogleServiceAccount string `json:"google_service_account,omitempty"` + LocalSsdCount int `json:"local_ssd_count,omitempty"` UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"` ZoneId string `json:"zone_id,omitempty"` } @@ -401,6 +403,11 @@ type ResourceJobNotificationSettings struct { NoAlertForSkippedRuns bool `json:"no_alert_for_skipped_runs,omitempty"` } +type ResourceJobParameter struct { + Default string `json:"default,omitempty"` + Name string `json:"name,omitempty"` +} + type ResourceJobPipelineTask struct { FullRefresh bool `json:"full_refresh,omitempty"` PipelineId string `json:"pipeline_id"` @@ -421,6 +428,11 @@ type ResourceJobRunAs struct { UserName string `json:"user_name,omitempty"` } +type ResourceJobRunJobTask struct { + JobId string `json:"job_id"` + JobParameters map[string]string `json:"job_parameters,omitempty"` +} + type ResourceJobSchedule struct { PauseStatus string `json:"pause_status,omitempty"` QuartzCronExpression string `json:"quartz_cron_expression"` @@ -573,6 +585,7 @@ type ResourceJobTaskNewClusterGcpAttributes struct { Availability string `json:"availability,omitempty"` BootDiskSize int `json:"boot_disk_size,omitempty"` GoogleServiceAccount string `json:"google_service_account,omitempty"` + LocalSsdCount int `json:"local_ssd_count,omitempty"` UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"` ZoneId string `json:"zone_id,omitempty"` } @@ -682,6 +695,11 @@ type ResourceJobTaskPythonWheelTask struct { Parameters []string `json:"parameters,omitempty"` } +type ResourceJobTaskRunJobTask struct { + JobId string `json:"job_id"` + JobParameters map[string]string `json:"job_parameters,omitempty"` +} + type ResourceJobTaskSparkJarTask struct { JarUri string `json:"jar_uri,omitempty"` MainClassName string `json:"main_class_name,omitempty"` @@ -760,6 +778,7 @@ type ResourceJobTask struct { NotificationSettings *ResourceJobTaskNotificationSettings `json:"notification_settings,omitempty"` PipelineTask *ResourceJobTaskPipelineTask `json:"pipeline_task,omitempty"` PythonWheelTask *ResourceJobTaskPythonWheelTask `json:"python_wheel_task,omitempty"` + RunJobTask *ResourceJobTaskRunJobTask `json:"run_job_task,omitempty"` SparkJarTask *ResourceJobTaskSparkJarTask `json:"spark_jar_task,omitempty"` SparkPythonTask *ResourceJobTaskSparkPythonTask `json:"spark_python_task,omitempty"` SparkSubmitTask *ResourceJobTaskSparkSubmitTask `json:"spark_submit_task,omitempty"` @@ -825,10 +844,12 @@ type ResourceJob struct { NewCluster *ResourceJobNewCluster `json:"new_cluster,omitempty"` NotebookTask *ResourceJobNotebookTask `json:"notebook_task,omitempty"` NotificationSettings *ResourceJobNotificationSettings `json:"notification_settings,omitempty"` + Parameter []ResourceJobParameter `json:"parameter,omitempty"` PipelineTask *ResourceJobPipelineTask `json:"pipeline_task,omitempty"` PythonWheelTask *ResourceJobPythonWheelTask `json:"python_wheel_task,omitempty"` Queue *ResourceJobQueue `json:"queue,omitempty"` RunAs *ResourceJobRunAs `json:"run_as,omitempty"` + RunJobTask *ResourceJobRunJobTask `json:"run_job_task,omitempty"` Schedule *ResourceJobSchedule `json:"schedule,omitempty"` SparkJarTask *ResourceJobSparkJarTask `json:"spark_jar_task,omitempty"` SparkPythonTask *ResourceJobSparkPythonTask `json:"spark_python_task,omitempty"` diff --git a/bundle/internal/tf/schema/resource_model_serving.go b/bundle/internal/tf/schema/resource_model_serving.go index b7ff88cc..cc5c3257 100644 --- a/bundle/internal/tf/schema/resource_model_serving.go +++ b/bundle/internal/tf/schema/resource_model_serving.go @@ -4,6 +4,7 @@ package schema type ResourceModelServingConfigServedModels struct { EnvironmentVars map[string]string `json:"environment_vars,omitempty"` + InstanceProfileArn string `json:"instance_profile_arn,omitempty"` ModelName string `json:"model_name"` ModelVersion string `json:"model_version"` Name string `json:"name,omitempty"` diff --git a/bundle/internal/tf/schema/resource_pipeline.go b/bundle/internal/tf/schema/resource_pipeline.go index 9e7f71b1..5c5de9a7 100644 --- a/bundle/internal/tf/schema/resource_pipeline.go +++ b/bundle/internal/tf/schema/resource_pipeline.go @@ -47,6 +47,7 @@ type ResourcePipelineClusterClusterLogConf struct { type ResourcePipelineClusterGcpAttributes struct { Availability string `json:"availability,omitempty"` GoogleServiceAccount string `json:"google_service_account,omitempty"` + LocalSsdCount int `json:"local_ssd_count,omitempty"` ZoneId string `json:"zone_id,omitempty"` } diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index 9cfe8491..3beb3007 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -24,7 +24,7 @@ func NewRoot() *Root { "required_providers": map[string]interface{}{ "databricks": map[string]interface{}{ "source": "databricks/databricks", - "version": ">= 1.0.0", + "version": "1.23.0", }, }, }, diff --git a/bundle/python/transform.go b/bundle/python/transform.go index c7938bab..6ec75a03 100644 --- a/bundle/python/transform.go +++ b/bundle/python/transform.go @@ -7,7 +7,6 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/mutator" - "github.com/databricks/cli/bundle/libraries" "github.com/databricks/databricks-sdk-go/service/jobs" ) @@ -17,17 +16,28 @@ const NOTEBOOK_TEMPLATE = `# Databricks notebook source %pip install --force-reinstall {{.Whl}} {{end}} +try: + from importlib import metadata +except ImportError: # for Python<3.8 + import subprocess + import sys + + subprocess.check_call([sys.executable, "-m", "pip", "install", "importlib-metadata"]) + import importlib_metadata as metadata + from contextlib import redirect_stdout import io import sys sys.argv = [{{.Params}}] -import pkg_resources -_func = pkg_resources.load_entry_point("{{.Task.PackageName}}", "console_scripts", "{{.Task.EntryPoint}}") +entry = [ep for ep in metadata.distribution("{{.Task.PackageName}}").entry_points if ep.name == "{{.Task.EntryPoint}}"] f = io.StringIO() with redirect_stdout(f): - _func() + if entry: + entry[0].load()() + else: + raise ImportError("Entry point '{{.Task.EntryPoint}}' not found") s = f.getvalue() dbutils.notebook.exit(s) ` @@ -38,19 +48,44 @@ dbutils.notebook.exit(s) func TransformWheelTask() bundle.Mutator { return mutator.NewTrampoline( "python_wheel", - getTasks, - generateTemplateData, - cleanUpTask, + &pythonTrampoline{}, NOTEBOOK_TEMPLATE, ) } -func getTasks(b *bundle.Bundle) []*jobs.Task { - return libraries.FindAllWheelTasks(b) +type pythonTrampoline struct{} + +func (t *pythonTrampoline) CleanUp(task *jobs.Task) error { + task.PythonWheelTask = nil + task.Libraries = nil + + return nil } -func generateTemplateData(task *jobs.Task) (map[string]any, error) { - params, err := generateParameters(task.PythonWheelTask) +func (t *pythonTrampoline) GetTasks(b *bundle.Bundle) []mutator.TaskWithJobKey { + r := b.Config.Resources + result := make([]mutator.TaskWithJobKey, 0) + for k := range b.Config.Resources.Jobs { + tasks := r.Jobs[k].JobSettings.Tasks + for i := range tasks { + task := &tasks[i] + + // Keep only Python wheel tasks + if task.PythonWheelTask == nil { + continue + } + + result = append(result, mutator.TaskWithJobKey{ + JobKey: k, + Task: task, + }) + } + } + return result +} + +func (t *pythonTrampoline) GetTemplateData(task *jobs.Task) (map[string]any, error) { + params, err := t.generateParameters(task.PythonWheelTask) if err != nil { return nil, err } @@ -64,7 +99,7 @@ func generateTemplateData(task *jobs.Task) (map[string]any, error) { return data, nil } -func generateParameters(task *jobs.PythonWheelTask) (string, error) { +func (t *pythonTrampoline) generateParameters(task *jobs.PythonWheelTask) (string, error) { if task.Parameters != nil && task.NamedParameters != nil { return "", fmt.Errorf("not allowed to pass both paramaters and named_parameters") } @@ -78,8 +113,3 @@ func generateParameters(task *jobs.PythonWheelTask) (string, error) { } return strings.Join(params, ", "), nil } - -func cleanUpTask(task *jobs.Task) { - task.PythonWheelTask = nil - task.Libraries = nil -} diff --git a/bundle/python/transform_test.go b/bundle/python/transform_test.go index ada64e7a..fb2c23e4 100644 --- a/bundle/python/transform_test.go +++ b/bundle/python/transform_test.go @@ -1,8 +1,13 @@ package python import ( + "context" + "strings" "testing" + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/require" ) @@ -11,9 +16,9 @@ type testCase struct { Actual []string Expected string } -type NamedParams map[string]string + type testCaseNamed struct { - Actual NamedParams + Actual map[string]string Expected string } @@ -26,32 +31,103 @@ var paramsTestCases []testCase = []testCase{ } var paramsTestCasesNamed []testCaseNamed = []testCaseNamed{ - {NamedParams{}, `"python"`}, - {NamedParams{"a": "1"}, `"python", "a=1"`}, - {NamedParams{"a": "1", "b": "2"}, `"python", "a=1", "b=2"`}, - {NamedParams{"data": `{"a": 1}`}, `"python", "data={\"a\": 1}"`}, + {map[string]string{}, `"python"`}, + {map[string]string{"a": "1"}, `"python", "a=1"`}, + {map[string]string{"a": "'1'"}, `"python", "a='1'"`}, + {map[string]string{"a": `"1"`}, `"python", "a=\"1\""`}, + {map[string]string{"a": "1", "b": "2"}, `"python", "a=1", "b=2"`}, + {map[string]string{"data": `{"a": 1}`}, `"python", "data={\"a\": 1}"`}, } func TestGenerateParameters(t *testing.T) { + trampoline := pythonTrampoline{} for _, c := range paramsTestCases { task := &jobs.PythonWheelTask{Parameters: c.Actual} - result, err := generateParameters(task) + result, err := trampoline.generateParameters(task) require.NoError(t, err) require.Equal(t, c.Expected, result) } } func TestGenerateNamedParameters(t *testing.T) { + trampoline := pythonTrampoline{} for _, c := range paramsTestCasesNamed { task := &jobs.PythonWheelTask{NamedParameters: c.Actual} - result, err := generateParameters(task) + result, err := trampoline.generateParameters(task) require.NoError(t, err) - require.Equal(t, c.Expected, result) + + // parameters order can be undetermenistic, so just check that they exist as expected + require.ElementsMatch(t, strings.Split(c.Expected, ","), strings.Split(result, ",")) } } func TestGenerateBoth(t *testing.T) { + trampoline := pythonTrampoline{} task := &jobs.PythonWheelTask{NamedParameters: map[string]string{"a": "1"}, Parameters: []string{"b"}} - _, err := generateParameters(task) + _, err := trampoline.generateParameters(task) require.Error(t, err) + require.ErrorContains(t, err, "not allowed to pass both paramaters and named_parameters") +} + +func TestTransformFiltersWheelTasksOnly(t *testing.T) { + trampoline := pythonTrampoline{} + bundle := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": { + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + TaskKey: "key1", + PythonWheelTask: &jobs.PythonWheelTask{}, + }, + { + TaskKey: "key2", + NotebookTask: &jobs.NotebookTask{}, + }, + }, + }, + }, + }, + }, + }, + } + + tasks := trampoline.GetTasks(bundle) + require.Len(t, tasks, 1) + require.Equal(t, "job1", tasks[0].JobKey) + require.Equal(t, "key1", tasks[0].Task.TaskKey) + require.NotNil(t, tasks[0].Task.PythonWheelTask) +} + +func TestNoPanicWithNoPythonWheelTasks(t *testing.T) { + tmpDir := t.TempDir() + b := &bundle.Bundle{ + Config: config.Root{ + Path: tmpDir, + Bundle: config.Bundle{ + Target: "development", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "test": { + Paths: resources.Paths{ + ConfigFilePath: tmpDir, + }, + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + TaskKey: "notebook_task", + NotebookTask: &jobs.NotebookTask{}}, + }, + }, + }, + }, + }, + }, + } + trampoline := TransformWheelTask() + err := bundle.Apply(context.Background(), b, trampoline) + require.NoError(t, err) } diff --git a/cmd/root/bundle.go b/cmd/root/bundle.go index fe97fbf2..10cce67a 100644 --- a/cmd/root/bundle.go +++ b/cmd/root/bundle.go @@ -43,7 +43,7 @@ func getTarget(cmd *cobra.Command) (value string) { return target } -func GetProfile(cmd *cobra.Command) (value string) { +func getProfile(cmd *cobra.Command) (value string) { // The command line flag takes precedence. flag := cmd.Flag("profile") if flag != nil { @@ -70,7 +70,7 @@ func loadBundle(cmd *cobra.Command, args []string, load func(ctx context.Context return nil, nil } - profile := GetProfile(cmd) + profile := getProfile(cmd) if profile != "" { b.Config.Workspace.Profile = profile } diff --git a/libs/auth/service_principal.go b/libs/auth/service_principal.go index 58fcc6a7..a6740b50 100644 --- a/libs/auth/service_principal.go +++ b/libs/auth/service_principal.go @@ -4,13 +4,17 @@ import ( "context" "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" ) // Determines whether a given user id is a service principal. // This function uses a heuristic: if no user exists with this id, we assume // it's a service principal. Unfortunately, the standard service principal API is too // slow for our purposes. -func IsServicePrincipal(ctx context.Context, ws *databricks.WorkspaceClient, userId string) bool { +func IsServicePrincipal(ctx context.Context, ws *databricks.WorkspaceClient, userId string) (bool, error) { _, err := ws.Users.GetById(ctx, userId) - return err != nil + if apierr.IsMissing(err) { + return true, nil + } + return false, err } diff --git a/libs/template/helpers.go b/libs/template/helpers.go index b8f2fe45..f947d9ba 100644 --- a/libs/template/helpers.go +++ b/libs/template/helpers.go @@ -104,7 +104,10 @@ func loadHelpers(ctx context.Context) template.FuncMap { return false, err } } - result := auth.IsServicePrincipal(ctx, w, user.Id) + result, err := auth.IsServicePrincipal(ctx, w, user.Id) + if err != nil { + return false, err + } is_service_principal = &result return result, nil }, diff --git a/libs/template/materialize.go b/libs/template/materialize.go index 5422160d..8517858f 100644 --- a/libs/template/materialize.go +++ b/libs/template/materialize.go @@ -7,6 +7,8 @@ import ( "os" "path" "path/filepath" + + "github.com/databricks/cli/libs/cmdio" ) const libraryDirName = "library" @@ -80,7 +82,7 @@ func Materialize(ctx context.Context, configFilePath, templateRoot, outputDir st if err != nil { return err } - println("✨ Successfully initialized template") + cmdio.LogString(ctx, "✨ Successfully initialized template") return nil }