Merge remote-tracking branch 'origin/main' into mutator-with-wrappers

This commit is contained in:
kartikgupta-db 2023-08-31 12:29:42 +02:00
commit 8d3381e0e0
No known key found for this signature in database
GPG Key ID: 6AD5FA11FACDEA39
28 changed files with 351 additions and 108 deletions

View File

@ -26,10 +26,9 @@ jobs:
run: git fetch --prune --unshallow
- name: Setup Go
uses: actions/setup-go@v3
uses: actions/setup-go@v4
with:
go-version: 1.21.0
cache: true
- name: Set go env
run: |
@ -54,9 +53,9 @@ jobs:
uses: actions/checkout@v3
- name: Setup Go
uses: actions/setup-go@v3
uses: actions/setup-go@v4
with:
go-version: 1.21
go-version: 1.21.0
# No need to download cached dependencies when running gofmt.
cache: false

View File

@ -19,27 +19,10 @@ jobs:
run: git fetch --prune --unshallow
- name: Setup Go
id: go
uses: actions/setup-go@v3
uses: actions/setup-go@v4
with:
go-version: 1.21.0
- name: Locate cache paths
id: cache
run: |
echo "GOMODCACHE=$(go env GOMODCACHE)" >> $GITHUB_OUTPUT
echo "GOCACHE=$(go env GOCACHE)" >> $GITHUB_OUTPUT
# Note: use custom caching because below performs a cross platform build
# through goreleaser and don't want to share a cache with the test builds.
- name: Setup caching
uses: actions/cache@v3
with:
path: |
${{ steps.cache.outputs.GOMODCACHE }}
${{ steps.cache.outputs.GOCACHE }}
key: release-${{ runner.os }}-go-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', '.goreleaser.yaml') }}
- name: Hide snapshot tag to outsmart GoReleaser
run: git tag -d snapshot || true

View File

@ -18,27 +18,10 @@ jobs:
run: git fetch --prune --unshallow
- name: Setup Go
id: go
uses: actions/setup-go@v3
uses: actions/setup-go@v4
with:
go-version: 1.21.0
- name: Locate cache paths
id: cache
run: |
echo "GOMODCACHE=$(go env GOMODCACHE)" >> $GITHUB_OUTPUT
echo "GOCACHE=$(go env GOCACHE)" >> $GITHUB_OUTPUT
# Note: use custom caching because below performs a cross platform build
# through goreleaser and don't want to share a cache with the test builds.
- name: Setup caching
uses: actions/cache@v3
with:
path: |
${{ steps.cache.outputs.GOMODCACHE }}
${{ steps.cache.outputs.GOCACHE }}
key: release-${{ runner.os }}-go-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', '.goreleaser.yaml') }}
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@v4
with:

View File

@ -1,5 +1,23 @@
# Version changelog
## 0.203.3
Bundles:
* Support cluster overrides with cluster_key and compute_key ([#696](https://github.com/databricks/cli/pull/696)).
* Allow referencing local Python wheels without artifacts section defined ([#703](https://github.com/databricks/cli/pull/703)).
* Fixed --environment flag ([#705](https://github.com/databricks/cli/pull/705)).
* Correctly identify local paths in libraries section ([#702](https://github.com/databricks/cli/pull/702)).
* Fixed path joining in FindFilesWithSuffixInPath ([#704](https://github.com/databricks/cli/pull/704)).
* Added transformation mutator for Python wheel task for them to work on DBR <13.1 ([#635](https://github.com/databricks/cli/pull/635)).
Internal:
* Add a foundation for built-in templates ([#685](https://github.com/databricks/cli/pull/685)).
* Test transform when no Python wheel tasks defined ([#714](https://github.com/databricks/cli/pull/714)).
* Pin Terraform binary version to 1.5.5 ([#715](https://github.com/databricks/cli/pull/715)).
* Cleanup after "Add a foundation for built-in templates" ([#707](https://github.com/databricks/cli/pull/707)).
* Filter down to Python wheel tasks only for trampoline ([#712](https://github.com/databricks/cli/pull/712)).
* Update Terraform provider schema structs from 1.23.0 ([#713](https://github.com/databricks/cli/pull/713)).
## 0.203.2
CLI:

View File

@ -160,7 +160,10 @@ func (m *processTargetMode) Apply(ctx context.Context, b *bundle.Bundle) error {
}
return transformDevelopmentMode(b)
case config.Production:
isPrincipal := auth.IsServicePrincipal(ctx, b.WorkspaceClient(), b.Config.Workspace.CurrentUser.Id)
isPrincipal, err := auth.IsServicePrincipal(ctx, b.WorkspaceClient(), b.Config.Workspace.CurrentUser.Id)
if err != nil {
return err
}
return validateProductionMode(ctx, b, isPrincipal)
case "":
// No action

View File

@ -12,26 +12,28 @@ import (
"github.com/databricks/databricks-sdk-go/service/jobs"
)
type fnTemplateData func(task *jobs.Task) (map[string]any, error)
type fnCleanUp func(task *jobs.Task)
type fnTasks func(b *bundle.Bundle) []*jobs.Task
type TaskWithJobKey struct {
Task *jobs.Task
JobKey string
}
type TrampolineFunctions interface {
GetTemplateData(task *jobs.Task) (map[string]any, error)
GetTasks(b *bundle.Bundle) []TaskWithJobKey
CleanUp(task *jobs.Task) error
}
type trampoline struct {
name string
getTasks fnTasks
templateData fnTemplateData
cleanUp fnCleanUp
template string
name string
functions TrampolineFunctions
template string
}
func NewTrampoline(
name string,
tasks fnTasks,
templateData fnTemplateData,
cleanUp fnCleanUp,
functions TrampolineFunctions,
template string,
) *trampoline {
return &trampoline{name, tasks, templateData, cleanUp, template}
return &trampoline{name, functions, template}
}
func (m *trampoline) Name() string {
@ -39,7 +41,7 @@ func (m *trampoline) Name() string {
}
func (m *trampoline) Apply(ctx context.Context, b *bundle.Bundle) error {
tasks := m.getTasks(b)
tasks := m.functions.GetTasks(b)
for _, task := range tasks {
err := m.generateNotebookWrapper(b, task)
if err != nil {
@ -49,13 +51,13 @@ func (m *trampoline) Apply(ctx context.Context, b *bundle.Bundle) error {
return nil
}
func (m *trampoline) generateNotebookWrapper(b *bundle.Bundle, task *jobs.Task) error {
func (m *trampoline) generateNotebookWrapper(b *bundle.Bundle, task TaskWithJobKey) error {
internalDir, err := b.InternalDir()
if err != nil {
return err
}
notebookName := fmt.Sprintf("notebook_%s", task.TaskKey)
notebookName := fmt.Sprintf("notebook_%s_%s", task.JobKey, task.Task.TaskKey)
localNotebookPath := filepath.Join(internalDir, notebookName+".py")
err = os.MkdirAll(filepath.Dir(localNotebookPath), 0755)
@ -69,7 +71,7 @@ func (m *trampoline) generateNotebookWrapper(b *bundle.Bundle, task *jobs.Task)
}
defer f.Close()
data, err := m.templateData(task)
data, err := m.functions.GetTemplateData(task.Task)
if err != nil {
return err
}
@ -84,10 +86,13 @@ func (m *trampoline) generateNotebookWrapper(b *bundle.Bundle, task *jobs.Task)
return err
}
m.cleanUp(task)
err = m.functions.CleanUp(task.Task)
if err != nil {
return err
}
remotePath := path.Join(b.Config.Workspace.FilesPath, filepath.ToSlash(internalDirRel), notebookName)
task.NotebookTask = &jobs.NotebookTask{
task.Task.NotebookTask = &jobs.NotebookTask{
NotebookPath: remotePath,
}

View File

@ -14,16 +14,21 @@ import (
"github.com/stretchr/testify/require"
)
func getTasks(b *bundle.Bundle) []*jobs.Task {
tasks := make([]*jobs.Task, 0)
type functions struct{}
func (f *functions) GetTasks(b *bundle.Bundle) []TaskWithJobKey {
tasks := make([]TaskWithJobKey, 0)
for k := range b.Config.Resources.Jobs["test"].Tasks {
tasks = append(tasks, &b.Config.Resources.Jobs["test"].Tasks[k])
tasks = append(tasks, TaskWithJobKey{
JobKey: "test",
Task: &b.Config.Resources.Jobs["test"].Tasks[k],
})
}
return tasks
}
func templateData(task *jobs.Task) (map[string]any, error) {
func (f *functions) GetTemplateData(task *jobs.Task) (map[string]any, error) {
if task.PythonWheelTask == nil {
return nil, fmt.Errorf("PythonWheelTask cannot be nil")
}
@ -33,8 +38,9 @@ func templateData(task *jobs.Task) (map[string]any, error) {
return data, nil
}
func cleanUp(task *jobs.Task) {
func (f *functions) CleanUp(task *jobs.Task) error {
task.PythonWheelTask = nil
return nil
}
func TestGenerateTrampoline(t *testing.T) {
@ -71,13 +77,14 @@ func TestGenerateTrampoline(t *testing.T) {
}
ctx := context.Background()
trampoline := NewTrampoline("test_trampoline", getTasks, templateData, cleanUp, "Hello from {{.MyName}}")
funcs := functions{}
trampoline := NewTrampoline("test_trampoline", &funcs, "Hello from {{.MyName}}")
err := bundle.Apply(ctx, b, trampoline)
require.NoError(t, err)
dir, err := b.InternalDir()
require.NoError(t, err)
filename := filepath.Join(dir, "notebook_to_trampoline.py")
filename := filepath.Join(dir, "notebook_test_to_trampoline.py")
bytes, err := os.ReadFile(filename)
require.NoError(t, err)
@ -85,6 +92,6 @@ func TestGenerateTrampoline(t *testing.T) {
require.Equal(t, "Hello from Trampoline", string(bytes))
task := b.Config.Resources.Jobs["test"].Tasks[0]
require.Equal(t, task.NotebookTask.NotebookPath, ".databricks/bundle/development/.internal/notebook_to_trampoline")
require.Equal(t, task.NotebookTask.NotebookPath, ".databricks/bundle/development/.internal/notebook_test_to_trampoline")
require.Nil(t, task.PythonWheelTask)
}

View File

@ -55,10 +55,10 @@ func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *con
}
// Download Terraform to private bin directory.
installer := &releases.LatestVersion{
Product: product.Terraform,
Constraints: version.MustConstraints(version.NewConstraint("<=1.5.5")),
InstallDir: binDir,
installer := &releases.ExactVersion{
Product: product.Terraform,
Version: version.Must(version.NewVersion("1.5.5")),
InstallDir: binDir,
}
execPath, err = installer.Install(ctx)
if err != nil {

View File

@ -8,6 +8,7 @@ import (
"strings"
"text/template"
schemapkg "github.com/databricks/cli/bundle/internal/tf/codegen/schema"
tfjson "github.com/hashicorp/terraform-json"
)
@ -32,6 +33,23 @@ func (c *collection) Generate(path string) error {
return tmpl.Execute(f, c)
}
type root struct {
OutputFile string
ProviderVersion string
}
func (r *root) Generate(path string) error {
tmpl := template.Must(template.ParseFiles(fmt.Sprintf("./templates/%s.tmpl", r.OutputFile)))
f, err := os.Create(filepath.Join(path, r.OutputFile))
if err != nil {
return err
}
defer f.Close()
return tmpl.Execute(f, r)
}
func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error {
// Generate types for resources.
var resources []*namedBlock
@ -105,5 +123,17 @@ func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error
}
}
// Generate root.go
{
r := &root{
OutputFile: "root.go",
ProviderVersion: schemapkg.ProviderVersion,
}
err := r.Generate(path)
if err != nil {
return err
}
}
return nil
}

View File

@ -8,6 +8,7 @@ import (
"os"
"path/filepath"
"github.com/hashicorp/go-version"
"github.com/hashicorp/hc-install/product"
"github.com/hashicorp/hc-install/releases"
"github.com/hashicorp/terraform-exec/tfexec"
@ -19,7 +20,7 @@ func (s *Schema) writeTerraformBlock(_ context.Context) error {
"required_providers": map[string]interface{}{
"databricks": map[string]interface{}{
"source": "databricks/databricks",
"version": ">= 1.0.0",
"version": ProviderVersion,
},
},
},
@ -40,9 +41,10 @@ func (s *Schema) installTerraform(ctx context.Context) (path string, err error)
return
}
installer := &releases.LatestVersion{
InstallDir: installDir,
installer := &releases.ExactVersion{
Product: product.Terraform,
Version: version.Must(version.NewVersion("1.5.5")),
InstallDir: installDir,
}
installer.SetLogger(log.Default())

View File

@ -0,0 +1,3 @@
package schema
const ProviderVersion = "1.23.0"

View File

@ -0,0 +1,32 @@
package schema
type Providers struct {
Databricks *Config `json:"databricks,omitempty"`
}
func NewProviders() *Providers {
return &Providers{
Databricks: &Config{},
}
}
type Root struct {
Terraform map[string]any `json:"terraform"`
Provider *Providers `json:"provider,omitempty"`
Data *DataSources `json:"data,omitempty"`
Resource *Resources `json:"resource,omitempty"`
}
func NewRoot() *Root {
return &Root{
Terraform: map[string]interface{}{
"required_providers": map[string]interface{}{
"databricks": map[string]interface{}{
"source": "databricks/databricks",
"version": "1.23.0",
},
},
},
}
}

View File

@ -90,6 +90,7 @@ type DataSourceClusterClusterInfoGcpAttributes struct {
Availability string `json:"availability,omitempty"`
BootDiskSize int `json:"boot_disk_size,omitempty"`
GoogleServiceAccount string `json:"google_service_account,omitempty"`
LocalSsdCount int `json:"local_ssd_count,omitempty"`
UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"`
ZoneId string `json:"zone_id,omitempty"`
}

View File

@ -26,6 +26,7 @@ type DataSourceInstancePoolPoolInfoDiskSpec struct {
type DataSourceInstancePoolPoolInfoGcpAttributes struct {
GcpAvailability string `json:"gcp_availability,omitempty"`
LocalSsdCount int `json:"local_ssd_count,omitempty"`
}
type DataSourceInstancePoolPoolInfoInstancePoolFleetAttributesFleetOnDemandOption struct {

View File

@ -124,6 +124,7 @@ type DataSourceJobJobSettingsSettingsJobClusterNewClusterGcpAttributes struct {
Availability string `json:"availability,omitempty"`
BootDiskSize int `json:"boot_disk_size,omitempty"`
GoogleServiceAccount string `json:"google_service_account,omitempty"`
LocalSsdCount int `json:"local_ssd_count,omitempty"`
UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"`
ZoneId string `json:"zone_id,omitempty"`
}
@ -305,6 +306,7 @@ type DataSourceJobJobSettingsSettingsNewClusterGcpAttributes struct {
Availability string `json:"availability,omitempty"`
BootDiskSize int `json:"boot_disk_size,omitempty"`
GoogleServiceAccount string `json:"google_service_account,omitempty"`
LocalSsdCount int `json:"local_ssd_count,omitempty"`
UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"`
ZoneId string `json:"zone_id,omitempty"`
}
@ -401,6 +403,11 @@ type DataSourceJobJobSettingsSettingsNotificationSettings struct {
NoAlertForSkippedRuns bool `json:"no_alert_for_skipped_runs,omitempty"`
}
type DataSourceJobJobSettingsSettingsParameter struct {
Default string `json:"default,omitempty"`
Name string `json:"name,omitempty"`
}
type DataSourceJobJobSettingsSettingsPipelineTask struct {
FullRefresh bool `json:"full_refresh,omitempty"`
PipelineId string `json:"pipeline_id"`
@ -421,6 +428,11 @@ type DataSourceJobJobSettingsSettingsRunAs struct {
UserName string `json:"user_name,omitempty"`
}
type DataSourceJobJobSettingsSettingsRunJobTask struct {
JobId string `json:"job_id"`
JobParameters map[string]string `json:"job_parameters,omitempty"`
}
type DataSourceJobJobSettingsSettingsSchedule struct {
PauseStatus string `json:"pause_status,omitempty"`
QuartzCronExpression string `json:"quartz_cron_expression"`
@ -573,6 +585,7 @@ type DataSourceJobJobSettingsSettingsTaskNewClusterGcpAttributes struct {
Availability string `json:"availability,omitempty"`
BootDiskSize int `json:"boot_disk_size,omitempty"`
GoogleServiceAccount string `json:"google_service_account,omitempty"`
LocalSsdCount int `json:"local_ssd_count,omitempty"`
UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"`
ZoneId string `json:"zone_id,omitempty"`
}
@ -682,6 +695,11 @@ type DataSourceJobJobSettingsSettingsTaskPythonWheelTask struct {
Parameters []string `json:"parameters,omitempty"`
}
type DataSourceJobJobSettingsSettingsTaskRunJobTask struct {
JobId string `json:"job_id"`
JobParameters map[string]string `json:"job_parameters,omitempty"`
}
type DataSourceJobJobSettingsSettingsTaskSparkJarTask struct {
JarUri string `json:"jar_uri,omitempty"`
MainClassName string `json:"main_class_name,omitempty"`
@ -760,6 +778,7 @@ type DataSourceJobJobSettingsSettingsTask struct {
NotificationSettings *DataSourceJobJobSettingsSettingsTaskNotificationSettings `json:"notification_settings,omitempty"`
PipelineTask *DataSourceJobJobSettingsSettingsTaskPipelineTask `json:"pipeline_task,omitempty"`
PythonWheelTask *DataSourceJobJobSettingsSettingsTaskPythonWheelTask `json:"python_wheel_task,omitempty"`
RunJobTask *DataSourceJobJobSettingsSettingsTaskRunJobTask `json:"run_job_task,omitempty"`
SparkJarTask *DataSourceJobJobSettingsSettingsTaskSparkJarTask `json:"spark_jar_task,omitempty"`
SparkPythonTask *DataSourceJobJobSettingsSettingsTaskSparkPythonTask `json:"spark_python_task,omitempty"`
SparkSubmitTask *DataSourceJobJobSettingsSettingsTaskSparkSubmitTask `json:"spark_submit_task,omitempty"`
@ -821,10 +840,12 @@ type DataSourceJobJobSettingsSettings struct {
NewCluster *DataSourceJobJobSettingsSettingsNewCluster `json:"new_cluster,omitempty"`
NotebookTask *DataSourceJobJobSettingsSettingsNotebookTask `json:"notebook_task,omitempty"`
NotificationSettings *DataSourceJobJobSettingsSettingsNotificationSettings `json:"notification_settings,omitempty"`
Parameter []DataSourceJobJobSettingsSettingsParameter `json:"parameter,omitempty"`
PipelineTask *DataSourceJobJobSettingsSettingsPipelineTask `json:"pipeline_task,omitempty"`
PythonWheelTask *DataSourceJobJobSettingsSettingsPythonWheelTask `json:"python_wheel_task,omitempty"`
Queue *DataSourceJobJobSettingsSettingsQueue `json:"queue,omitempty"`
RunAs *DataSourceJobJobSettingsSettingsRunAs `json:"run_as,omitempty"`
RunJobTask *DataSourceJobJobSettingsSettingsRunJobTask `json:"run_job_task,omitempty"`
Schedule *DataSourceJobJobSettingsSettingsSchedule `json:"schedule,omitempty"`
SparkJarTask *DataSourceJobJobSettingsSettingsSparkJarTask `json:"spark_jar_task,omitempty"`
SparkPythonTask *DataSourceJobJobSettingsSettingsSparkPythonTask `json:"spark_python_task,omitempty"`

View File

@ -68,6 +68,7 @@ type ResourceClusterGcpAttributes struct {
Availability string `json:"availability,omitempty"`
BootDiskSize int `json:"boot_disk_size,omitempty"`
GoogleServiceAccount string `json:"google_service_account,omitempty"`
LocalSsdCount int `json:"local_ssd_count,omitempty"`
UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"`
ZoneId string `json:"zone_id,omitempty"`
}

View File

@ -0,0 +1,15 @@
// Generated from Databricks Terraform provider schema. DO NOT EDIT.
package schema
type ResourceConnection struct {
Comment string `json:"comment,omitempty"`
ConnectionType string `json:"connection_type"`
Id string `json:"id,omitempty"`
MetastoreId string `json:"metastore_id,omitempty"`
Name string `json:"name"`
Options map[string]string `json:"options"`
Owner string `json:"owner,omitempty"`
Properties map[string]string `json:"properties,omitempty"`
ReadOnly bool `json:"read_only,omitempty"`
}

View File

@ -26,6 +26,7 @@ type ResourceInstancePoolDiskSpec struct {
type ResourceInstancePoolGcpAttributes struct {
GcpAvailability string `json:"gcp_availability,omitempty"`
LocalSsdCount int `json:"local_ssd_count,omitempty"`
}
type ResourceInstancePoolInstancePoolFleetAttributesFleetOnDemandOption struct {

View File

@ -124,6 +124,7 @@ type ResourceJobJobClusterNewClusterGcpAttributes struct {
Availability string `json:"availability,omitempty"`
BootDiskSize int `json:"boot_disk_size,omitempty"`
GoogleServiceAccount string `json:"google_service_account,omitempty"`
LocalSsdCount int `json:"local_ssd_count,omitempty"`
UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"`
ZoneId string `json:"zone_id,omitempty"`
}
@ -305,6 +306,7 @@ type ResourceJobNewClusterGcpAttributes struct {
Availability string `json:"availability,omitempty"`
BootDiskSize int `json:"boot_disk_size,omitempty"`
GoogleServiceAccount string `json:"google_service_account,omitempty"`
LocalSsdCount int `json:"local_ssd_count,omitempty"`
UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"`
ZoneId string `json:"zone_id,omitempty"`
}
@ -401,6 +403,11 @@ type ResourceJobNotificationSettings struct {
NoAlertForSkippedRuns bool `json:"no_alert_for_skipped_runs,omitempty"`
}
type ResourceJobParameter struct {
Default string `json:"default,omitempty"`
Name string `json:"name,omitempty"`
}
type ResourceJobPipelineTask struct {
FullRefresh bool `json:"full_refresh,omitempty"`
PipelineId string `json:"pipeline_id"`
@ -421,6 +428,11 @@ type ResourceJobRunAs struct {
UserName string `json:"user_name,omitempty"`
}
type ResourceJobRunJobTask struct {
JobId string `json:"job_id"`
JobParameters map[string]string `json:"job_parameters,omitempty"`
}
type ResourceJobSchedule struct {
PauseStatus string `json:"pause_status,omitempty"`
QuartzCronExpression string `json:"quartz_cron_expression"`
@ -573,6 +585,7 @@ type ResourceJobTaskNewClusterGcpAttributes struct {
Availability string `json:"availability,omitempty"`
BootDiskSize int `json:"boot_disk_size,omitempty"`
GoogleServiceAccount string `json:"google_service_account,omitempty"`
LocalSsdCount int `json:"local_ssd_count,omitempty"`
UsePreemptibleExecutors bool `json:"use_preemptible_executors,omitempty"`
ZoneId string `json:"zone_id,omitempty"`
}
@ -682,6 +695,11 @@ type ResourceJobTaskPythonWheelTask struct {
Parameters []string `json:"parameters,omitempty"`
}
type ResourceJobTaskRunJobTask struct {
JobId string `json:"job_id"`
JobParameters map[string]string `json:"job_parameters,omitempty"`
}
type ResourceJobTaskSparkJarTask struct {
JarUri string `json:"jar_uri,omitempty"`
MainClassName string `json:"main_class_name,omitempty"`
@ -760,6 +778,7 @@ type ResourceJobTask struct {
NotificationSettings *ResourceJobTaskNotificationSettings `json:"notification_settings,omitempty"`
PipelineTask *ResourceJobTaskPipelineTask `json:"pipeline_task,omitempty"`
PythonWheelTask *ResourceJobTaskPythonWheelTask `json:"python_wheel_task,omitempty"`
RunJobTask *ResourceJobTaskRunJobTask `json:"run_job_task,omitempty"`
SparkJarTask *ResourceJobTaskSparkJarTask `json:"spark_jar_task,omitempty"`
SparkPythonTask *ResourceJobTaskSparkPythonTask `json:"spark_python_task,omitempty"`
SparkSubmitTask *ResourceJobTaskSparkSubmitTask `json:"spark_submit_task,omitempty"`
@ -825,10 +844,12 @@ type ResourceJob struct {
NewCluster *ResourceJobNewCluster `json:"new_cluster,omitempty"`
NotebookTask *ResourceJobNotebookTask `json:"notebook_task,omitempty"`
NotificationSettings *ResourceJobNotificationSettings `json:"notification_settings,omitempty"`
Parameter []ResourceJobParameter `json:"parameter,omitempty"`
PipelineTask *ResourceJobPipelineTask `json:"pipeline_task,omitempty"`
PythonWheelTask *ResourceJobPythonWheelTask `json:"python_wheel_task,omitempty"`
Queue *ResourceJobQueue `json:"queue,omitempty"`
RunAs *ResourceJobRunAs `json:"run_as,omitempty"`
RunJobTask *ResourceJobRunJobTask `json:"run_job_task,omitempty"`
Schedule *ResourceJobSchedule `json:"schedule,omitempty"`
SparkJarTask *ResourceJobSparkJarTask `json:"spark_jar_task,omitempty"`
SparkPythonTask *ResourceJobSparkPythonTask `json:"spark_python_task,omitempty"`

View File

@ -4,6 +4,7 @@ package schema
type ResourceModelServingConfigServedModels struct {
EnvironmentVars map[string]string `json:"environment_vars,omitempty"`
InstanceProfileArn string `json:"instance_profile_arn,omitempty"`
ModelName string `json:"model_name"`
ModelVersion string `json:"model_version"`
Name string `json:"name,omitempty"`

View File

@ -47,6 +47,7 @@ type ResourcePipelineClusterClusterLogConf struct {
type ResourcePipelineClusterGcpAttributes struct {
Availability string `json:"availability,omitempty"`
GoogleServiceAccount string `json:"google_service_account,omitempty"`
LocalSsdCount int `json:"local_ssd_count,omitempty"`
ZoneId string `json:"zone_id,omitempty"`
}

View File

@ -24,7 +24,7 @@ func NewRoot() *Root {
"required_providers": map[string]interface{}{
"databricks": map[string]interface{}{
"source": "databricks/databricks",
"version": ">= 1.0.0",
"version": "1.23.0",
},
},
},

View File

@ -7,7 +7,6 @@ import (
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/cli/bundle/libraries"
"github.com/databricks/databricks-sdk-go/service/jobs"
)
@ -17,17 +16,28 @@ const NOTEBOOK_TEMPLATE = `# Databricks notebook source
%pip install --force-reinstall {{.Whl}}
{{end}}
try:
from importlib import metadata
except ImportError: # for Python<3.8
import subprocess
import sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "importlib-metadata"])
import importlib_metadata as metadata
from contextlib import redirect_stdout
import io
import sys
sys.argv = [{{.Params}}]
import pkg_resources
_func = pkg_resources.load_entry_point("{{.Task.PackageName}}", "console_scripts", "{{.Task.EntryPoint}}")
entry = [ep for ep in metadata.distribution("{{.Task.PackageName}}").entry_points if ep.name == "{{.Task.EntryPoint}}"]
f = io.StringIO()
with redirect_stdout(f):
_func()
if entry:
entry[0].load()()
else:
raise ImportError("Entry point '{{.Task.EntryPoint}}' not found")
s = f.getvalue()
dbutils.notebook.exit(s)
`
@ -38,19 +48,44 @@ dbutils.notebook.exit(s)
func TransformWheelTask() bundle.Mutator {
return mutator.NewTrampoline(
"python_wheel",
getTasks,
generateTemplateData,
cleanUpTask,
&pythonTrampoline{},
NOTEBOOK_TEMPLATE,
)
}
func getTasks(b *bundle.Bundle) []*jobs.Task {
return libraries.FindAllWheelTasks(b)
type pythonTrampoline struct{}
func (t *pythonTrampoline) CleanUp(task *jobs.Task) error {
task.PythonWheelTask = nil
task.Libraries = nil
return nil
}
func generateTemplateData(task *jobs.Task) (map[string]any, error) {
params, err := generateParameters(task.PythonWheelTask)
func (t *pythonTrampoline) GetTasks(b *bundle.Bundle) []mutator.TaskWithJobKey {
r := b.Config.Resources
result := make([]mutator.TaskWithJobKey, 0)
for k := range b.Config.Resources.Jobs {
tasks := r.Jobs[k].JobSettings.Tasks
for i := range tasks {
task := &tasks[i]
// Keep only Python wheel tasks
if task.PythonWheelTask == nil {
continue
}
result = append(result, mutator.TaskWithJobKey{
JobKey: k,
Task: task,
})
}
}
return result
}
func (t *pythonTrampoline) GetTemplateData(task *jobs.Task) (map[string]any, error) {
params, err := t.generateParameters(task.PythonWheelTask)
if err != nil {
return nil, err
}
@ -64,7 +99,7 @@ func generateTemplateData(task *jobs.Task) (map[string]any, error) {
return data, nil
}
func generateParameters(task *jobs.PythonWheelTask) (string, error) {
func (t *pythonTrampoline) generateParameters(task *jobs.PythonWheelTask) (string, error) {
if task.Parameters != nil && task.NamedParameters != nil {
return "", fmt.Errorf("not allowed to pass both paramaters and named_parameters")
}
@ -78,8 +113,3 @@ func generateParameters(task *jobs.PythonWheelTask) (string, error) {
}
return strings.Join(params, ", "), nil
}
func cleanUpTask(task *jobs.Task) {
task.PythonWheelTask = nil
task.Libraries = nil
}

View File

@ -1,8 +1,13 @@
package python
import (
"context"
"strings"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/require"
)
@ -11,9 +16,9 @@ type testCase struct {
Actual []string
Expected string
}
type NamedParams map[string]string
type testCaseNamed struct {
Actual NamedParams
Actual map[string]string
Expected string
}
@ -26,32 +31,103 @@ var paramsTestCases []testCase = []testCase{
}
var paramsTestCasesNamed []testCaseNamed = []testCaseNamed{
{NamedParams{}, `"python"`},
{NamedParams{"a": "1"}, `"python", "a=1"`},
{NamedParams{"a": "1", "b": "2"}, `"python", "a=1", "b=2"`},
{NamedParams{"data": `{"a": 1}`}, `"python", "data={\"a\": 1}"`},
{map[string]string{}, `"python"`},
{map[string]string{"a": "1"}, `"python", "a=1"`},
{map[string]string{"a": "'1'"}, `"python", "a='1'"`},
{map[string]string{"a": `"1"`}, `"python", "a=\"1\""`},
{map[string]string{"a": "1", "b": "2"}, `"python", "a=1", "b=2"`},
{map[string]string{"data": `{"a": 1}`}, `"python", "data={\"a\": 1}"`},
}
func TestGenerateParameters(t *testing.T) {
trampoline := pythonTrampoline{}
for _, c := range paramsTestCases {
task := &jobs.PythonWheelTask{Parameters: c.Actual}
result, err := generateParameters(task)
result, err := trampoline.generateParameters(task)
require.NoError(t, err)
require.Equal(t, c.Expected, result)
}
}
func TestGenerateNamedParameters(t *testing.T) {
trampoline := pythonTrampoline{}
for _, c := range paramsTestCasesNamed {
task := &jobs.PythonWheelTask{NamedParameters: c.Actual}
result, err := generateParameters(task)
result, err := trampoline.generateParameters(task)
require.NoError(t, err)
require.Equal(t, c.Expected, result)
// parameters order can be undetermenistic, so just check that they exist as expected
require.ElementsMatch(t, strings.Split(c.Expected, ","), strings.Split(result, ","))
}
}
func TestGenerateBoth(t *testing.T) {
trampoline := pythonTrampoline{}
task := &jobs.PythonWheelTask{NamedParameters: map[string]string{"a": "1"}, Parameters: []string{"b"}}
_, err := generateParameters(task)
_, err := trampoline.generateParameters(task)
require.Error(t, err)
require.ErrorContains(t, err, "not allowed to pass both paramaters and named_parameters")
}
func TestTransformFiltersWheelTasksOnly(t *testing.T) {
trampoline := pythonTrampoline{}
bundle := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
TaskKey: "key1",
PythonWheelTask: &jobs.PythonWheelTask{},
},
{
TaskKey: "key2",
NotebookTask: &jobs.NotebookTask{},
},
},
},
},
},
},
},
}
tasks := trampoline.GetTasks(bundle)
require.Len(t, tasks, 1)
require.Equal(t, "job1", tasks[0].JobKey)
require.Equal(t, "key1", tasks[0].Task.TaskKey)
require.NotNil(t, tasks[0].Task.PythonWheelTask)
}
func TestNoPanicWithNoPythonWheelTasks(t *testing.T) {
tmpDir := t.TempDir()
b := &bundle.Bundle{
Config: config.Root{
Path: tmpDir,
Bundle: config.Bundle{
Target: "development",
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"test": {
Paths: resources.Paths{
ConfigFilePath: tmpDir,
},
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
TaskKey: "notebook_task",
NotebookTask: &jobs.NotebookTask{}},
},
},
},
},
},
},
}
trampoline := TransformWheelTask()
err := bundle.Apply(context.Background(), b, trampoline)
require.NoError(t, err)
}

View File

@ -43,7 +43,7 @@ func getTarget(cmd *cobra.Command) (value string) {
return target
}
func GetProfile(cmd *cobra.Command) (value string) {
func getProfile(cmd *cobra.Command) (value string) {
// The command line flag takes precedence.
flag := cmd.Flag("profile")
if flag != nil {
@ -70,7 +70,7 @@ func loadBundle(cmd *cobra.Command, args []string, load func(ctx context.Context
return nil, nil
}
profile := GetProfile(cmd)
profile := getProfile(cmd)
if profile != "" {
b.Config.Workspace.Profile = profile
}

View File

@ -4,13 +4,17 @@ import (
"context"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/apierr"
)
// Determines whether a given user id is a service principal.
// This function uses a heuristic: if no user exists with this id, we assume
// it's a service principal. Unfortunately, the standard service principal API is too
// slow for our purposes.
func IsServicePrincipal(ctx context.Context, ws *databricks.WorkspaceClient, userId string) bool {
func IsServicePrincipal(ctx context.Context, ws *databricks.WorkspaceClient, userId string) (bool, error) {
_, err := ws.Users.GetById(ctx, userId)
return err != nil
if apierr.IsMissing(err) {
return true, nil
}
return false, err
}

View File

@ -104,7 +104,10 @@ func loadHelpers(ctx context.Context) template.FuncMap {
return false, err
}
}
result := auth.IsServicePrincipal(ctx, w, user.Id)
result, err := auth.IsServicePrincipal(ctx, w, user.Id)
if err != nil {
return false, err
}
is_service_principal = &result
return result, nil
},

View File

@ -7,6 +7,8 @@ import (
"os"
"path"
"path/filepath"
"github.com/databricks/cli/libs/cmdio"
)
const libraryDirName = "library"
@ -80,7 +82,7 @@ func Materialize(ctx context.Context, configFilePath, templateRoot, outputDir st
if err != nil {
return err
}
println("✨ Successfully initialized template")
cmdio.LogString(ctx, "✨ Successfully initialized template")
return nil
}