mirror of https://github.com/databricks/cli.git
Interpolate paths for job tasks that reference files (#306)
## Changes This change also swaps the order of mutators such that interpolation happens before path translation. This means that is is possible to use variables (e.g. `${bundle.environment}`) in notebook or file paths. ## Tests New tests pass and verified manually.
This commit is contained in:
parent
7427ceba6c
commit
4e4c0658db
|
@ -1,93 +0,0 @@
|
|||
package mutator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/databricks/bricks/bundle"
|
||||
"github.com/databricks/bricks/libs/notebook"
|
||||
)
|
||||
|
||||
type translateNotebookPaths struct {
|
||||
seen map[string]string
|
||||
}
|
||||
|
||||
// TranslateNotebookPaths converts paths to local notebook files into paths in the workspace file system.
|
||||
func TranslateNotebookPaths() bundle.Mutator {
|
||||
return &translateNotebookPaths{}
|
||||
}
|
||||
|
||||
func (m *translateNotebookPaths) Name() string {
|
||||
return "TranslateNotebookPaths"
|
||||
}
|
||||
|
||||
func (m *translateNotebookPaths) rewritePath(b *bundle.Bundle, p *string) error {
|
||||
// We assume absolute paths point to a location in the workspace
|
||||
if path.IsAbs(*p) {
|
||||
return nil
|
||||
}
|
||||
relPath := path.Clean(*p)
|
||||
if interp, ok := m.seen[relPath]; ok {
|
||||
*p = interp
|
||||
return nil
|
||||
}
|
||||
|
||||
absPath := filepath.Join(b.Config.Path, relPath)
|
||||
nb, _, err := notebook.Detect(absPath)
|
||||
if os.IsNotExist(err) {
|
||||
return fmt.Errorf("notebook %s not found: %w", *p, err)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to determine if %s is a notebook: %w", relPath, err)
|
||||
}
|
||||
if !nb {
|
||||
return fmt.Errorf("file at %s is not a notebook", relPath)
|
||||
}
|
||||
|
||||
// Upon import, notebooks are stripped of their extension.
|
||||
withoutExt := strings.TrimSuffix(relPath, filepath.Ext(relPath))
|
||||
|
||||
// We have a notebook on our hands! It will be available under the file path.
|
||||
interp := fmt.Sprintf("${workspace.file_path.workspace}/%s", withoutExt)
|
||||
*p = interp
|
||||
m.seen[relPath] = interp
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *translateNotebookPaths) Apply(_ context.Context, b *bundle.Bundle) ([]bundle.Mutator, error) {
|
||||
m.seen = make(map[string]string)
|
||||
|
||||
for _, job := range b.Config.Resources.Jobs {
|
||||
for i := 0; i < len(job.Tasks); i++ {
|
||||
task := &job.Tasks[i]
|
||||
if task.NotebookTask == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
err := m.rewritePath(b, &task.NotebookTask.NotebookPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, pipeline := range b.Config.Resources.Pipelines {
|
||||
for i := 0; i < len(pipeline.Libraries); i++ {
|
||||
library := &pipeline.Libraries[i]
|
||||
if library.Notebook == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
err := m.rewritePath(b, &library.Notebook.Path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
|
@ -0,0 +1,144 @@
|
|||
package mutator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/databricks/bricks/bundle"
|
||||
"github.com/databricks/bricks/libs/notebook"
|
||||
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||
"github.com/databricks/databricks-sdk-go/service/pipelines"
|
||||
)
|
||||
|
||||
type translatePaths struct {
|
||||
seen map[string]string
|
||||
filePath string
|
||||
}
|
||||
|
||||
// TranslatePaths converts paths to local notebook files into paths in the workspace file system.
|
||||
func TranslatePaths() bundle.Mutator {
|
||||
return &translatePaths{}
|
||||
}
|
||||
|
||||
func (m *translatePaths) Name() string {
|
||||
return "TranslatePaths"
|
||||
}
|
||||
|
||||
func (m *translatePaths) rewritePath(b *bundle.Bundle, p *string, fn func(literal, relPath, absPath string) (string, error)) error {
|
||||
// We assume absolute paths point to a location in the workspace
|
||||
if path.IsAbs(*p) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Reuse value if this path has been rewritten before.
|
||||
relPath := path.Clean(*p)
|
||||
if interp, ok := m.seen[relPath]; ok {
|
||||
*p = interp
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert local path into workspace path via specified function.
|
||||
absPath := filepath.Join(b.Config.Path, relPath)
|
||||
interp, err := fn(*p, relPath, absPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*p = interp
|
||||
m.seen[relPath] = interp
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *translatePaths) translateNotebookPath(literal, relPath, absPath string) (string, error) {
|
||||
nb, _, err := notebook.Detect(absPath)
|
||||
if os.IsNotExist(err) {
|
||||
return "", fmt.Errorf("notebook %s not found", literal)
|
||||
}
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to determine if %s is a notebook: %w", relPath, err)
|
||||
}
|
||||
if !nb {
|
||||
return "", fmt.Errorf("file at %s is not a notebook", relPath)
|
||||
}
|
||||
|
||||
// Upon import, notebooks are stripped of their extension.
|
||||
withoutExt := strings.TrimSuffix(relPath, filepath.Ext(relPath))
|
||||
|
||||
// We have a notebook on our hands! It will be available under the file path.
|
||||
return path.Join(m.filePath, withoutExt), nil
|
||||
}
|
||||
|
||||
func (m *translatePaths) translateFilePath(literal, relPath, absPath string) (string, error) {
|
||||
_, err := os.Stat(absPath)
|
||||
if os.IsNotExist(err) {
|
||||
return "", fmt.Errorf("file %s not found", literal)
|
||||
}
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to access %s: %w", relPath, err)
|
||||
}
|
||||
|
||||
// The file will be available under the file path.
|
||||
return path.Join(m.filePath, relPath), nil
|
||||
}
|
||||
|
||||
func (m *translatePaths) translateJobTask(b *bundle.Bundle, task *jobs.JobTaskSettings) error {
|
||||
var err error
|
||||
|
||||
if task.NotebookTask != nil {
|
||||
err = m.rewritePath(b, &task.NotebookTask.NotebookPath, m.translateNotebookPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if task.SparkPythonTask != nil {
|
||||
err = m.rewritePath(b, &task.SparkPythonTask.PythonFile, m.translateFilePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *translatePaths) translatePipelineLibrary(b *bundle.Bundle, library *pipelines.PipelineLibrary) error {
|
||||
var err error
|
||||
|
||||
if library.Notebook != nil {
|
||||
err = m.rewritePath(b, &library.Notebook.Path, m.translateNotebookPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) ([]bundle.Mutator, error) {
|
||||
m.seen = make(map[string]string)
|
||||
m.filePath = b.Config.Workspace.FilePath.Workspace
|
||||
|
||||
for _, job := range b.Config.Resources.Jobs {
|
||||
for i := 0; i < len(job.Tasks); i++ {
|
||||
err := m.translateJobTask(b, &job.Tasks[i])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, pipeline := range b.Config.Resources.Pipelines {
|
||||
for i := 0; i < len(pipeline.Libraries); i++ {
|
||||
err := m.translatePipelineLibrary(b, &pipeline.Libraries[i])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
|
@ -16,21 +16,33 @@ import (
|
|||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func touchFile(t *testing.T, path string) {
|
||||
func touchNotebookFile(t *testing.T, path string) {
|
||||
f, err := os.Create(path)
|
||||
require.NoError(t, err)
|
||||
f.WriteString("# Databricks notebook source\n")
|
||||
f.Close()
|
||||
}
|
||||
|
||||
func TestNotebookPaths(t *testing.T) {
|
||||
func touchEmptyFile(t *testing.T, path string) {
|
||||
f, err := os.Create(path)
|
||||
require.NoError(t, err)
|
||||
f.Close()
|
||||
}
|
||||
|
||||
func TestTranslatePaths(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
touchFile(t, filepath.Join(dir, "my_job_notebook.py"))
|
||||
touchFile(t, filepath.Join(dir, "my_pipeline_notebook.py"))
|
||||
touchNotebookFile(t, filepath.Join(dir, "my_job_notebook.py"))
|
||||
touchNotebookFile(t, filepath.Join(dir, "my_pipeline_notebook.py"))
|
||||
touchEmptyFile(t, filepath.Join(dir, "my_python_file.py"))
|
||||
|
||||
bundle := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Path: dir,
|
||||
Workspace: config.Workspace{
|
||||
FilePath: config.PathLike{
|
||||
Workspace: "/bundle",
|
||||
},
|
||||
},
|
||||
Resources: config.Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"job": {
|
||||
|
@ -56,6 +68,11 @@ func TestNotebookPaths(t *testing.T) {
|
|||
PackageName: "foo",
|
||||
},
|
||||
},
|
||||
{
|
||||
SparkPythonTask: &jobs.SparkPythonTask{
|
||||
PythonFile: "./my_python_file.py",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -90,13 +107,13 @@ func TestNotebookPaths(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
_, err := mutator.TranslateNotebookPaths().Apply(context.Background(), bundle)
|
||||
_, err := mutator.TranslatePaths().Apply(context.Background(), bundle)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Assert that the path in the tasks now refer to the artifact.
|
||||
assert.Equal(
|
||||
t,
|
||||
"${workspace.file_path.workspace}/my_job_notebook",
|
||||
"/bundle/my_job_notebook",
|
||||
bundle.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath,
|
||||
)
|
||||
assert.Equal(
|
||||
|
@ -106,14 +123,19 @@ func TestNotebookPaths(t *testing.T) {
|
|||
)
|
||||
assert.Equal(
|
||||
t,
|
||||
"${workspace.file_path.workspace}/my_job_notebook",
|
||||
"/bundle/my_job_notebook",
|
||||
bundle.Config.Resources.Jobs["job"].Tasks[2].NotebookTask.NotebookPath,
|
||||
)
|
||||
assert.Equal(
|
||||
t,
|
||||
"/bundle/my_python_file.py",
|
||||
bundle.Config.Resources.Jobs["job"].Tasks[4].SparkPythonTask.PythonFile,
|
||||
)
|
||||
|
||||
// Assert that the path in the libraries now refer to the artifact.
|
||||
assert.Equal(
|
||||
t,
|
||||
"${workspace.file_path.workspace}/my_pipeline_notebook",
|
||||
"/bundle/my_pipeline_notebook",
|
||||
bundle.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path,
|
||||
)
|
||||
assert.Equal(
|
||||
|
@ -123,7 +145,7 @@ func TestNotebookPaths(t *testing.T) {
|
|||
)
|
||||
assert.Equal(
|
||||
t,
|
||||
"${workspace.file_path.workspace}/my_pipeline_notebook",
|
||||
"/bundle/my_pipeline_notebook",
|
||||
bundle.Config.Resources.Pipelines["pipeline"].Libraries[2].Notebook.Path,
|
||||
)
|
||||
}
|
||||
|
@ -152,8 +174,36 @@ func TestJobNotebookDoesNotExistError(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
_, err := mutator.TranslateNotebookPaths().Apply(context.Background(), bundle)
|
||||
assert.ErrorContains(t, err, "notebook ./doesnt_exist.py not found")
|
||||
_, err := mutator.TranslatePaths().Apply(context.Background(), bundle)
|
||||
assert.EqualError(t, err, "notebook ./doesnt_exist.py not found")
|
||||
}
|
||||
|
||||
func TestJobFileDoesNotExistError(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
bundle := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Path: dir,
|
||||
Resources: config.Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"job": {
|
||||
JobSettings: &jobs.JobSettings{
|
||||
Tasks: []jobs.JobTaskSettings{
|
||||
{
|
||||
SparkPythonTask: &jobs.SparkPythonTask{
|
||||
PythonFile: "./doesnt_exist.py",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_, err := mutator.TranslatePaths().Apply(context.Background(), bundle)
|
||||
assert.EqualError(t, err, "file ./doesnt_exist.py not found")
|
||||
}
|
||||
|
||||
func TestPipelineNotebookDoesNotExistError(t *testing.T) {
|
||||
|
@ -180,6 +230,6 @@ func TestPipelineNotebookDoesNotExistError(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
_, err := mutator.TranslateNotebookPaths().Apply(context.Background(), bundle)
|
||||
assert.ErrorContains(t, err, "notebook ./doesnt_exist.py not found")
|
||||
_, err := mutator.TranslatePaths().Apply(context.Background(), bundle)
|
||||
assert.EqualError(t, err, "notebook ./doesnt_exist.py not found")
|
||||
}
|
|
@ -18,11 +18,11 @@ func Initialize() bundle.Mutator {
|
|||
mutator.DefineDefaultWorkspaceRoot(),
|
||||
mutator.ExpandWorkspaceRoot(),
|
||||
mutator.DefineDefaultWorkspacePaths(),
|
||||
mutator.TranslateNotebookPaths(),
|
||||
interpolation.Interpolate(
|
||||
interpolation.IncludeLookupsInPath("bundle"),
|
||||
interpolation.IncludeLookupsInPath("workspace"),
|
||||
),
|
||||
mutator.TranslatePaths(),
|
||||
terraform.Initialize(),
|
||||
},
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue