From aeb77d94c3d35eb8ccf1b50bb62cce091f687032 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 18 Aug 2023 11:23:45 +0200 Subject: [PATCH] more reusable code --- bundle/config/mutator/trampoline.go | 95 ++++++++++++++++++ bundle/config/mutator/trampoline_test.go | 90 +++++++++++++++++ bundle/python/transform.go | 119 +++++------------------ bundle/python/transform_test.go | 2 + 4 files changed, 211 insertions(+), 95 deletions(-) create mode 100644 bundle/config/mutator/trampoline.go create mode 100644 bundle/config/mutator/trampoline_test.go diff --git a/bundle/config/mutator/trampoline.go b/bundle/config/mutator/trampoline.go new file mode 100644 index 00000000..6265c28d --- /dev/null +++ b/bundle/config/mutator/trampoline.go @@ -0,0 +1,95 @@ +package mutator + +import ( + "context" + "fmt" + "os" + "path" + "path/filepath" + "text/template" + + "github.com/databricks/cli/bundle" + "github.com/databricks/databricks-sdk-go/service/jobs" +) + +type fnTemplateData func(task *jobs.Task) (map[string]any, error) +type fnCleanUp func(task *jobs.Task) +type fnTasks func(b *bundle.Bundle) []*jobs.Task + +type trampoline struct { + name string + getTasks fnTasks + templateData fnTemplateData + cleanUp fnCleanUp + template string +} + +func NewTrampoline( + name string, + tasks fnTasks, + templateData fnTemplateData, + cleanUp fnCleanUp, + template string, +) *trampoline { + return &trampoline{name, tasks, templateData, cleanUp, template} +} + +func (m *trampoline) Name() string { + return fmt.Sprintf("trampoline(%s)", m.name) +} + +func (m *trampoline) Apply(ctx context.Context, b *bundle.Bundle) error { + tasks := m.getTasks(b) + for _, task := range tasks { + err := m.generateNotebookWrapper(b, task) + if err != nil { + return err + } + } + return nil +} + +func (m *trampoline) generateNotebookWrapper(b *bundle.Bundle, task *jobs.Task) error { + internalDir, err := b.InternalDir() + if err != nil { + return err + } + + notebookName := fmt.Sprintf("notebook_%s", task.TaskKey) + localNotebookPath := filepath.Join(internalDir, notebookName+".py") + + err = os.MkdirAll(filepath.Dir(localNotebookPath), 0755) + if err != nil { + return err + } + + f, err := os.Create(localNotebookPath) + if err != nil { + return err + } + defer f.Close() + + data, err := m.templateData(task) + if err != nil { + return err + } + + t, err := template.New(notebookName).Parse(m.template) + if err != nil { + return err + } + + internalDirRel, err := filepath.Rel(b.Config.Path, internalDir) + if err != nil { + return err + } + + m.cleanUp(task) + remotePath := path.Join(b.Config.Workspace.FilesPath, filepath.ToSlash(internalDirRel), notebookName) + + task.NotebookTask = &jobs.NotebookTask{ + NotebookPath: remotePath, + } + + return t.Execute(f, data) +} diff --git a/bundle/config/mutator/trampoline_test.go b/bundle/config/mutator/trampoline_test.go new file mode 100644 index 00000000..9c176ff0 --- /dev/null +++ b/bundle/config/mutator/trampoline_test.go @@ -0,0 +1,90 @@ +package mutator + +import ( + "context" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/require" +) + +func getTasks(b *bundle.Bundle) []*jobs.Task { + tasks := make([]*jobs.Task, 0) + for k := range b.Config.Resources.Jobs["test"].Tasks { + tasks = append(tasks, &b.Config.Resources.Jobs["test"].Tasks[k]) + } + + return tasks +} + +func templateData(task *jobs.Task) (map[string]any, error) { + if task.PythonWheelTask == nil { + return nil, fmt.Errorf("PythonWheelTask cannot be nil") + } + + data := make(map[string]any) + data["MyName"] = "Trampoline" + return data, nil +} + +func cleanUp(task *jobs.Task) { + task.PythonWheelTask = nil +} + +func TestGenerateTrampoline(t *testing.T) { + tmpDir := t.TempDir() + + tasks := []jobs.Task{ + { + TaskKey: "to_trampoline", + PythonWheelTask: &jobs.PythonWheelTask{ + PackageName: "test", + EntryPoint: "run", + }}, + } + + b := &bundle.Bundle{ + Config: config.Root{ + Path: tmpDir, + Bundle: config.Bundle{ + Target: "development", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "test": { + Paths: resources.Paths{ + ConfigFilePath: tmpDir, + }, + JobSettings: &jobs.JobSettings{ + Tasks: tasks, + }, + }, + }, + }, + }, + } + ctx := context.Background() + + trampoline := NewTrampoline("test_trampoline", getTasks, templateData, cleanUp, "Hello from {{.MyName}}") + err := bundle.Apply(ctx, b, trampoline) + require.NoError(t, err) + + dir, err := b.InternalDir() + require.NoError(t, err) + filename := filepath.Join(dir, "notebook_to_trampoline.py") + + bytes, err := os.ReadFile(filename) + require.NoError(t, err) + + require.Equal(t, "Hello from Trampoline", string(bytes)) + + task := b.Config.Resources.Jobs["test"].Tasks[0] + require.Equal(t, task.NotebookTask.NotebookPath, ".databricks/bundle/development/.internal/notebook_to_trampoline") + require.Nil(t, task.PythonWheelTask) +} diff --git a/bundle/python/transform.go b/bundle/python/transform.go index cd7c826f..c7938bab 100644 --- a/bundle/python/transform.go +++ b/bundle/python/transform.go @@ -1,17 +1,13 @@ package python import ( - "context" "fmt" - "os" - "path" - "path/filepath" + "strconv" "strings" - "text/template" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/libraries" - "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/jobs" ) @@ -40,105 +36,32 @@ dbutils.notebook.exit(s) // which installs uploaded wheels using %pip and then calling corresponding // entry point. func TransformWheelTask() bundle.Mutator { - return &transform{} + return mutator.NewTrampoline( + "python_wheel", + getTasks, + generateTemplateData, + cleanUpTask, + NOTEBOOK_TEMPLATE, + ) } -type transform struct { +func getTasks(b *bundle.Bundle) []*jobs.Task { + return libraries.FindAllWheelTasks(b) } -func (m *transform) Name() string { - return "python.TransformWheelTask" -} - -func (m *transform) Apply(ctx context.Context, b *bundle.Bundle) error { - wheelTasks := libraries.FindAllWheelTasks(b) - for _, wheelTask := range wheelTasks { - err := generateNotebookTrampoline(b, wheelTask) - if err != nil { - return err - } - } - return nil -} - -func generateNotebookTrampoline(b *bundle.Bundle, wheelTask *jobs.Task) error { - taskDefinition := wheelTask.PythonWheelTask - libraries := wheelTask.Libraries - - wheelTask.PythonWheelTask = nil - wheelTask.Libraries = nil - - filename, err := generateNotebookWrapper(b, taskDefinition, libraries) +func generateTemplateData(task *jobs.Task) (map[string]any, error) { + params, err := generateParameters(task.PythonWheelTask) if err != nil { - return err - } - - internalDir, err := getInternalDir(b) - if err != nil { - return err - } - - internalDirRel, err := filepath.Rel(b.Config.Path, internalDir) - if err != nil { - return err - } - - parts := []string{b.Config.Workspace.FilesPath} - parts = append(parts, strings.Split(internalDirRel, string(os.PathSeparator))...) - parts = append(parts, filename) - - wheelTask.NotebookTask = &jobs.NotebookTask{ - NotebookPath: path.Join(parts...), - } - - return nil -} - -func getInternalDir(b *bundle.Bundle) (string, error) { - cacheDir, err := b.CacheDir() - if err != nil { - return "", err - } - internalDir := filepath.Join(cacheDir, ".internal") - return internalDir, nil -} - -func generateNotebookWrapper(b *bundle.Bundle, task *jobs.PythonWheelTask, libraries []compute.Library) (string, error) { - internalDir, err := getInternalDir(b) - if err != nil { - return "", err - } - - notebookName := fmt.Sprintf("notebook_%s_%s", task.PackageName, task.EntryPoint) - path := filepath.Join(internalDir, notebookName+".py") - - err = os.MkdirAll(filepath.Dir(path), 0755) - if err != nil { - return "", err - } - - f, err := os.Create(path) - if err != nil { - return "", err - } - defer f.Close() - - params, err := generateParameters(task) - if err != nil { - return "", err + return nil, err } data := map[string]any{ - "Libraries": libraries, + "Libraries": task.Libraries, "Params": params, - "Task": task, + "Task": task.PythonWheelTask, } - t, err := template.New("notebook").Parse(NOTEBOOK_TEMPLATE) - if err != nil { - return "", err - } - return notebookName, t.Execute(f, data) + return data, nil } func generateParameters(task *jobs.PythonWheelTask) (string, error) { @@ -149,8 +72,14 @@ func generateParameters(task *jobs.PythonWheelTask) (string, error) { for k, v := range task.NamedParameters { params = append(params, fmt.Sprintf("%s=%s", k, v)) } + for i := range params { - params[i] = `"` + params[i] + `"` + params[i] = strconv.Quote(params[i]) } return strings.Join(params, ", "), nil } + +func cleanUpTask(task *jobs.Task) { + task.PythonWheelTask = nil + task.Libraries = nil +} diff --git a/bundle/python/transform_test.go b/bundle/python/transform_test.go index 6b3c5f55..ada64e7a 100644 --- a/bundle/python/transform_test.go +++ b/bundle/python/transform_test.go @@ -22,12 +22,14 @@ var paramsTestCases []testCase = []testCase{ {[]string{"a"}, `"python", "a"`}, {[]string{"a", "b"}, `"python", "a", "b"`}, {[]string{"123!@#$%^&*()-="}, `"python", "123!@#$%^&*()-="`}, + {[]string{`{"a": 1}`}, `"python", "{\"a\": 1}"`}, } var paramsTestCasesNamed []testCaseNamed = []testCaseNamed{ {NamedParams{}, `"python"`}, {NamedParams{"a": "1"}, `"python", "a=1"`}, {NamedParams{"a": "1", "b": "2"}, `"python", "a=1", "b=2"`}, + {NamedParams{"data": `{"a": 1}`}, `"python", "data={\"a\": 1}"`}, } func TestGenerateParameters(t *testing.T) {