2023-08-30 12:21:39 +00:00
|
|
|
package python
|
|
|
|
|
|
|
|
import (
|
2024-07-09 15:08:38 +00:00
|
|
|
"context"
|
2023-08-30 12:21:39 +00:00
|
|
|
"fmt"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/databricks/cli/bundle"
|
|
|
|
"github.com/databricks/cli/bundle/config/mutator"
|
2023-09-08 11:08:21 +00:00
|
|
|
"github.com/databricks/cli/bundle/libraries"
|
2024-03-04 12:34:03 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/service/compute"
|
2023-08-30 12:21:39 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
|
|
|
)
|
|
|
|
|
|
|
|
const NOTEBOOK_TEMPLATE = `# Databricks notebook source
|
|
|
|
%python
|
|
|
|
{{range .Libraries}}
|
|
|
|
%pip install --force-reinstall {{.Whl}}
|
|
|
|
{{end}}
|
|
|
|
|
2023-08-31 14:10:32 +00:00
|
|
|
dbutils.library.restartPython()
|
|
|
|
|
2023-08-30 12:21:39 +00:00
|
|
|
try:
|
|
|
|
from importlib import metadata
|
|
|
|
except ImportError: # for Python<3.8
|
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
|
|
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "importlib-metadata"])
|
|
|
|
import importlib_metadata as metadata
|
|
|
|
|
|
|
|
from contextlib import redirect_stdout
|
|
|
|
import io
|
|
|
|
import sys
|
2023-12-01 10:35:20 +00:00
|
|
|
import json
|
|
|
|
|
|
|
|
params = []
|
|
|
|
try:
|
|
|
|
python_params = dbutils.widgets.get("__python_params")
|
|
|
|
if python_params:
|
|
|
|
params = json.loads(python_params)
|
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
|
|
|
|
2023-08-30 12:21:39 +00:00
|
|
|
sys.argv = [{{.Params}}]
|
|
|
|
|
2023-12-01 10:35:20 +00:00
|
|
|
if params:
|
|
|
|
sys.argv = [sys.argv[0]] + params
|
|
|
|
|
2023-08-30 12:21:39 +00:00
|
|
|
entry = [ep for ep in metadata.distribution("{{.Task.PackageName}}").entry_points if ep.name == "{{.Task.EntryPoint}}"]
|
|
|
|
|
|
|
|
f = io.StringIO()
|
|
|
|
with redirect_stdout(f):
|
|
|
|
if entry:
|
|
|
|
entry[0].load()()
|
|
|
|
else:
|
|
|
|
raise ImportError("Entry point '{{.Task.EntryPoint}}' not found")
|
|
|
|
s = f.getvalue()
|
|
|
|
dbutils.notebook.exit(s)
|
|
|
|
`
|
|
|
|
|
|
|
|
// This mutator takes the wheel task and transforms it into notebook
|
|
|
|
// which installs uploaded wheels using %pip and then calling corresponding
|
|
|
|
// entry point.
|
|
|
|
func TransformWheelTask() bundle.Mutator {
|
2024-07-09 15:08:38 +00:00
|
|
|
return bundle.If(
|
|
|
|
func(_ context.Context, b *bundle.Bundle) (bool, error) {
|
|
|
|
res := b.Config.Experimental != nil && b.Config.Experimental.PythonWheelWrapper
|
|
|
|
return res, nil
|
2023-09-26 14:32:20 +00:00
|
|
|
},
|
|
|
|
mutator.NewTrampoline(
|
|
|
|
"python_wheel",
|
|
|
|
&pythonTrampoline{},
|
|
|
|
NOTEBOOK_TEMPLATE,
|
|
|
|
),
|
|
|
|
mutator.NoOp(),
|
2023-08-30 12:21:39 +00:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
type pythonTrampoline struct{}
|
|
|
|
|
|
|
|
func (t *pythonTrampoline) CleanUp(task *jobs.Task) error {
|
|
|
|
task.PythonWheelTask = nil
|
2024-03-04 12:34:03 +00:00
|
|
|
|
|
|
|
nonWheelLibraries := make([]compute.Library, 0)
|
|
|
|
for _, l := range task.Libraries {
|
|
|
|
if l.Whl == "" {
|
|
|
|
nonWheelLibraries = append(nonWheelLibraries, l)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
task.Libraries = nonWheelLibraries
|
2023-08-30 12:21:39 +00:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *pythonTrampoline) GetTasks(b *bundle.Bundle) []mutator.TaskWithJobKey {
|
|
|
|
r := b.Config.Resources
|
|
|
|
result := make([]mutator.TaskWithJobKey, 0)
|
|
|
|
for k := range b.Config.Resources.Jobs {
|
|
|
|
tasks := r.Jobs[k].JobSettings.Tasks
|
|
|
|
for i := range tasks {
|
|
|
|
task := &tasks[i]
|
2023-08-30 13:51:15 +00:00
|
|
|
|
2023-09-08 13:45:21 +00:00
|
|
|
// Keep only Python wheel tasks with workspace libraries referenced.
|
|
|
|
// At this point of moment we don't have local paths in Libraries sections anymore
|
|
|
|
// Local paths have been replaced with the remote when the artifacts where uploaded
|
|
|
|
// in artifacts.UploadAll mutator.
|
2024-04-22 11:44:34 +00:00
|
|
|
if task.PythonWheelTask == nil || !needsTrampoline(*task) {
|
2023-08-30 13:51:15 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2023-08-30 12:21:39 +00:00
|
|
|
result = append(result, mutator.TaskWithJobKey{
|
|
|
|
JobKey: k,
|
|
|
|
Task: task,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
2024-04-22 11:44:34 +00:00
|
|
|
func needsTrampoline(task jobs.Task) bool {
|
2023-09-08 13:45:21 +00:00
|
|
|
return libraries.IsTaskWithWorkspaceLibraries(task)
|
|
|
|
}
|
|
|
|
|
2023-08-30 12:21:39 +00:00
|
|
|
func (t *pythonTrampoline) GetTemplateData(task *jobs.Task) (map[string]any, error) {
|
|
|
|
params, err := t.generateParameters(task.PythonWheelTask)
|
2024-03-04 12:34:03 +00:00
|
|
|
whlLibraries := make([]compute.Library, 0)
|
|
|
|
for _, l := range task.Libraries {
|
|
|
|
if l.Whl != "" {
|
|
|
|
whlLibraries = append(whlLibraries, l)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-30 12:21:39 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
data := map[string]any{
|
2024-03-04 12:34:03 +00:00
|
|
|
"Libraries": whlLibraries,
|
2023-08-30 12:21:39 +00:00
|
|
|
"Params": params,
|
|
|
|
"Task": task.PythonWheelTask,
|
|
|
|
}
|
|
|
|
|
|
|
|
return data, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *pythonTrampoline) generateParameters(task *jobs.PythonWheelTask) (string, error) {
|
|
|
|
if task.Parameters != nil && task.NamedParameters != nil {
|
|
|
|
return "", fmt.Errorf("not allowed to pass both paramaters and named_parameters")
|
|
|
|
}
|
2023-09-26 14:32:20 +00:00
|
|
|
params := append([]string{task.PackageName}, task.Parameters...)
|
2023-08-30 12:21:39 +00:00
|
|
|
for k, v := range task.NamedParameters {
|
|
|
|
params = append(params, fmt.Sprintf("%s=%s", k, v))
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range params {
|
|
|
|
params[i] = strconv.Quote(params[i])
|
|
|
|
}
|
|
|
|
return strings.Join(params, ", "), nil
|
|
|
|
}
|