databricks-cli/bundle/config/mutator/trampoline.go

101 lines
2.0 KiB
Go
Raw Permalink Normal View History

Added transformation mutator for Python wheel task for them to work on DBR <13.1 (#635) ## Changes ***Note: this PR relies on sync.include functionality from here: https://github.com/databricks/cli/pull/671*** Added transformation mutator for Python wheel task for them to work on DBR <13.1 Using wheels upload to Workspace file system as cluster libraries is not supported in DBR < 13.1 In order to make Python wheel work correctly on DBR < 13.1 we do the following: 1. Build and upload python wheel as usual 2. Transform python wheel task into special notebook task which does the following a. Installs all necessary wheels with %pip magic b. Executes defined entry point with all provided parameters 3. Upload this notebook file to workspace file system 4. Deploy transformed job task This is also beneficial for executing on existing clusters because this notebook always reinstall wheels so if there are any changes to the wheel package, they are correctly picked up ## Tests bundle.yml ```yaml bundle: name: wheel-task workspace: host: **** resources: jobs: test_job: name: "[${bundle.environment}] My Wheel Job" tasks: - task_key: TestTask existing_cluster_id: "***" python_wheel_task: package_name: "my_test_code" entry_point: "run" parameters: ["first argument","first value","second argument","second value"] libraries: - whl: ./dist/*.whl ``` Output ``` andrew.nester@HFW9Y94129 wheel % databricks bundle run test_job Run URL: *** 2023-08-03 15:58:04 "[default] My Wheel Job" TERMINATED SUCCESS Output: ======= Task TestTask: Hello from my func Got arguments v1: ['python', 'first argument', 'first value', 'second argument', 'second value'] ```
2023-08-30 12:21:39 +00:00
package mutator
import (
"context"
"fmt"
"os"
"path"
"path/filepath"
"text/template"
"github.com/databricks/cli/bundle"
"github.com/databricks/databricks-sdk-go/service/jobs"
)
type TaskWithJobKey struct {
Task *jobs.Task
JobKey string
}
type TrampolineFunctions interface {
GetTemplateData(task *jobs.Task) (map[string]any, error)
GetTasks(b *bundle.Bundle) []TaskWithJobKey
CleanUp(task *jobs.Task) error
}
type trampoline struct {
name string
functions TrampolineFunctions
template string
}
func NewTrampoline(
name string,
functions TrampolineFunctions,
template string,
) *trampoline {
return &trampoline{name, functions, template}
}
func (m *trampoline) Name() string {
return fmt.Sprintf("trampoline(%s)", m.name)
}
func (m *trampoline) Apply(ctx context.Context, b *bundle.Bundle) error {
tasks := m.functions.GetTasks(b)
for _, task := range tasks {
err := m.generateNotebookWrapper(ctx, b, task)
Added transformation mutator for Python wheel task for them to work on DBR <13.1 (#635) ## Changes ***Note: this PR relies on sync.include functionality from here: https://github.com/databricks/cli/pull/671*** Added transformation mutator for Python wheel task for them to work on DBR <13.1 Using wheels upload to Workspace file system as cluster libraries is not supported in DBR < 13.1 In order to make Python wheel work correctly on DBR < 13.1 we do the following: 1. Build and upload python wheel as usual 2. Transform python wheel task into special notebook task which does the following a. Installs all necessary wheels with %pip magic b. Executes defined entry point with all provided parameters 3. Upload this notebook file to workspace file system 4. Deploy transformed job task This is also beneficial for executing on existing clusters because this notebook always reinstall wheels so if there are any changes to the wheel package, they are correctly picked up ## Tests bundle.yml ```yaml bundle: name: wheel-task workspace: host: **** resources: jobs: test_job: name: "[${bundle.environment}] My Wheel Job" tasks: - task_key: TestTask existing_cluster_id: "***" python_wheel_task: package_name: "my_test_code" entry_point: "run" parameters: ["first argument","first value","second argument","second value"] libraries: - whl: ./dist/*.whl ``` Output ``` andrew.nester@HFW9Y94129 wheel % databricks bundle run test_job Run URL: *** 2023-08-03 15:58:04 "[default] My Wheel Job" TERMINATED SUCCESS Output: ======= Task TestTask: Hello from my func Got arguments v1: ['python', 'first argument', 'first value', 'second argument', 'second value'] ```
2023-08-30 12:21:39 +00:00
if err != nil {
return err
}
}
return nil
}
func (m *trampoline) generateNotebookWrapper(ctx context.Context, b *bundle.Bundle, task TaskWithJobKey) error {
internalDir, err := b.InternalDir(ctx)
Added transformation mutator for Python wheel task for them to work on DBR <13.1 (#635) ## Changes ***Note: this PR relies on sync.include functionality from here: https://github.com/databricks/cli/pull/671*** Added transformation mutator for Python wheel task for them to work on DBR <13.1 Using wheels upload to Workspace file system as cluster libraries is not supported in DBR < 13.1 In order to make Python wheel work correctly on DBR < 13.1 we do the following: 1. Build and upload python wheel as usual 2. Transform python wheel task into special notebook task which does the following a. Installs all necessary wheels with %pip magic b. Executes defined entry point with all provided parameters 3. Upload this notebook file to workspace file system 4. Deploy transformed job task This is also beneficial for executing on existing clusters because this notebook always reinstall wheels so if there are any changes to the wheel package, they are correctly picked up ## Tests bundle.yml ```yaml bundle: name: wheel-task workspace: host: **** resources: jobs: test_job: name: "[${bundle.environment}] My Wheel Job" tasks: - task_key: TestTask existing_cluster_id: "***" python_wheel_task: package_name: "my_test_code" entry_point: "run" parameters: ["first argument","first value","second argument","second value"] libraries: - whl: ./dist/*.whl ``` Output ``` andrew.nester@HFW9Y94129 wheel % databricks bundle run test_job Run URL: *** 2023-08-03 15:58:04 "[default] My Wheel Job" TERMINATED SUCCESS Output: ======= Task TestTask: Hello from my func Got arguments v1: ['python', 'first argument', 'first value', 'second argument', 'second value'] ```
2023-08-30 12:21:39 +00:00
if err != nil {
return err
}
notebookName := fmt.Sprintf("notebook_%s_%s", task.JobKey, task.Task.TaskKey)
localNotebookPath := filepath.Join(internalDir, notebookName+".py")
err = os.MkdirAll(filepath.Dir(localNotebookPath), 0755)
if err != nil {
return err
}
f, err := os.Create(localNotebookPath)
if err != nil {
return err
}
defer f.Close()
data, err := m.functions.GetTemplateData(task.Task)
if err != nil {
return err
}
t, err := template.New(notebookName).Parse(m.template)
if err != nil {
return err
}
internalDirRel, err := filepath.Rel(b.Config.Path, internalDir)
if err != nil {
return err
}
err = m.functions.CleanUp(task.Task)
if err != nil {
return err
}
remotePath := path.Join(b.Config.Workspace.FilesPath, filepath.ToSlash(internalDirRel), notebookName)
task.Task.NotebookTask = &jobs.NotebookTask{
NotebookPath: remotePath,
}
return t.Execute(f, data)
}