databricks-cli/bundle/python/notebook_task_wrappers.go

134 lines
3.2 KiB
Go
Raw Normal View History

2023-08-31 12:04:04 +00:00
package python
import (
2023-09-05 11:39:47 +00:00
_ "embed"
2023-08-31 12:04:04 +00:00
"encoding/json"
"fmt"
"os"
2023-09-05 11:39:47 +00:00
"path/filepath"
2023-08-31 12:04:04 +00:00
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config/mutator"
2023-09-13 10:50:29 +00:00
jobs_utils "github.com/databricks/cli/libs/jobs"
2023-08-31 12:04:04 +00:00
"github.com/databricks/databricks-sdk-go/service/jobs"
)
2023-09-05 11:39:47 +00:00
//go:embed trampoline_data/notebook.py
2023-08-31 12:04:04 +00:00
var notebookTrampolineData string
2023-09-05 11:39:47 +00:00
//go:embed trampoline_data/python.py
2023-08-31 12:04:04 +00:00
var pyTrampolineData string
func TransforNotebookTask() bundle.Mutator {
return mutator.NewTrampoline(
"python_notebook",
&notebookTrampoline{},
)
}
type notebookTrampoline struct{}
2023-09-05 11:39:47 +00:00
func localNotebookPath(b *bundle.Bundle, task *jobs.Task) (string, error) {
remotePath := task.NotebookTask.NotebookPath
relRemotePath, err := filepath.Rel(b.Config.Workspace.FilesPath, remotePath)
if err != nil {
return "", err
}
localPath := filepath.Join(b.Config.Path, filepath.FromSlash(relRemotePath))
_, err = os.Stat(fmt.Sprintf("%s.ipynb", localPath))
if err == nil {
return fmt.Sprintf("%s.ipynb", localPath), nil
}
_, err = os.Stat(fmt.Sprintf("%s.py", localPath))
if err == nil {
return fmt.Sprintf("%s.py", localPath), nil
}
2023-09-13 10:50:29 +00:00
2023-09-05 11:39:47 +00:00
return "", fmt.Errorf("notebook %s not found", localPath)
}
2023-09-13 10:50:29 +00:00
func (n *notebookTrampoline) GetTasks(b *bundle.Bundle) []jobs_utils.TaskWithJobKey {
return jobs_utils.GetTasksWithJobKeyBy(b, func(task *jobs.Task) bool {
2023-09-05 11:39:47 +00:00
if task.NotebookTask == nil ||
task.NotebookTask.Source == jobs.SourceGit {
return false
}
2023-09-13 10:50:29 +00:00
_, err := localNotebookPath(b, task)
// We assume if the notebook is not available locally in the bundle
// then the user has it somewhere in the workspace. For these
// out of bundle notebooks we do not want to write a trampoline.
return err == nil
2023-08-31 12:04:04 +00:00
})
}
func (n *notebookTrampoline) CleanUp(task *jobs.Task) error {
return nil
}
2023-09-05 11:39:47 +00:00
func (n *notebookTrampoline) GetTemplate(b *bundle.Bundle, task *jobs.Task) (string, error) {
localPath, err := localNotebookPath(b, task)
if err != nil {
return "", err
2023-08-31 12:04:04 +00:00
}
2023-09-05 11:39:47 +00:00
bytesData, err := os.ReadFile(localPath)
2023-08-31 12:04:04 +00:00
if err != nil {
return "", err
}
s := strings.TrimSpace(string(bytesData))
2023-09-05 11:39:47 +00:00
if strings.HasSuffix(localPath, ".ipynb") {
2023-08-31 12:04:04 +00:00
return getIpynbTemplate(s)
}
lines := strings.Split(s, "\n")
if strings.HasPrefix(lines[0], "# Databricks notebook source") {
2023-09-13 10:50:29 +00:00
return getDbnbTemplate(s)
2023-08-31 12:04:04 +00:00
}
2023-09-13 10:50:29 +00:00
return pyTrampolineData, nil
2023-08-31 12:04:04 +00:00
}
func getDbnbTemplate(s string) (string, error) {
s = strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(s), "# Databricks notebook source"))
return fmt.Sprintf(`# Databricks notebook source
%s
# Command ----------
%s
`, notebookTrampolineData, s), nil
}
func getIpynbTemplate(s string) (string, error) {
var data map[string]any
err := json.Unmarshal([]byte(s), &data)
if err != nil {
return "", err
}
if data["cells"] == nil {
data["cells"] = []any{}
}
data["cells"] = append([]any{
map[string]any{
"cell_type": "code",
"source": []string{notebookTrampolineData},
},
}, data["cells"].([]any)...)
bytes, err := json.Marshal(data)
if err != nil {
return "", err
}
return string(bytes), nil
}
func (n *notebookTrampoline) GetTemplateData(b *bundle.Bundle, task *jobs.Task) (map[string]any, error) {
return map[string]any{
"ProjectRoot": b.Config.Workspace.FilesPath,
"SourceFile": task.NotebookTask.NotebookPath,
}, nil
}