mirror of https://github.com/databricks/cli.git
Mutator to convert paths to local notebooks files into artifacts (#144)
This lets you write: ```yaml libraries: - notebook: path: ./events.sql ``` Instead of: ```yaml artifacts: events_sql: notebook: path: ./events.sql libraries: - notebook: path: "${artifacts.events_sql.notebook.remote_path}" ```
This commit is contained in:
parent
1a9a431b97
commit
4026b2cda2
|
@ -0,0 +1,88 @@
|
|||
package mutator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
|
||||
"github.com/databricks/bricks/bundle"
|
||||
"github.com/databricks/bricks/bundle/config"
|
||||
)
|
||||
|
||||
type translateNotebookPaths struct {
|
||||
seen map[string]string
|
||||
}
|
||||
|
||||
// TranslateNotebookPaths converts paths to local notebook files into references to artifacts.
|
||||
func TranslateNotebookPaths() bundle.Mutator {
|
||||
return &translateNotebookPaths{}
|
||||
}
|
||||
|
||||
func (m *translateNotebookPaths) Name() string {
|
||||
return "TranslateNotebookPaths"
|
||||
}
|
||||
|
||||
var nonWord = regexp.MustCompile(`[^\w]`)
|
||||
|
||||
func (m *translateNotebookPaths) rewritePath(b *bundle.Bundle, p *string) {
|
||||
relPath := path.Clean(*p)
|
||||
absPath := filepath.Join(b.Config.Path, relPath)
|
||||
|
||||
// This is opportunistic. If we can't stat, continue.
|
||||
_, err := os.Stat(absPath)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Define artifact for this notebook.
|
||||
id := nonWord.ReplaceAllString(relPath, "_")
|
||||
if v, ok := m.seen[id]; ok {
|
||||
*p = v
|
||||
return
|
||||
}
|
||||
|
||||
b.Config.Artifacts[id] = &config.Artifact{
|
||||
Notebook: &config.NotebookArtifact{
|
||||
Path: relPath,
|
||||
},
|
||||
}
|
||||
|
||||
interp := fmt.Sprintf("${artifacts.%s.notebook.remote_path}", id)
|
||||
*p = interp
|
||||
m.seen[id] = interp
|
||||
}
|
||||
|
||||
func (m *translateNotebookPaths) Apply(_ context.Context, b *bundle.Bundle) ([]bundle.Mutator, error) {
|
||||
m.seen = make(map[string]string)
|
||||
|
||||
if b.Config.Artifacts == nil {
|
||||
b.Config.Artifacts = make(map[string]*config.Artifact)
|
||||
}
|
||||
|
||||
for _, job := range b.Config.Resources.Jobs {
|
||||
for i := 0; i < len(job.Tasks); i++ {
|
||||
task := &job.Tasks[i]
|
||||
if task.NotebookTask == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
m.rewritePath(b, &task.NotebookTask.NotebookPath)
|
||||
}
|
||||
}
|
||||
|
||||
for _, pipeline := range b.Config.Resources.Pipelines {
|
||||
for i := 0; i < len(pipeline.Libraries); i++ {
|
||||
library := &pipeline.Libraries[i]
|
||||
if library.Notebook == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
m.rewritePath(b, &library.Notebook.Path)
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
package mutator_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/databricks/bricks/bundle"
|
||||
"github.com/databricks/bricks/bundle/config"
|
||||
"github.com/databricks/bricks/bundle/config/mutator"
|
||||
"github.com/databricks/bricks/bundle/config/resources"
|
||||
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||
"github.com/databricks/databricks-sdk-go/service/pipelines"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func touchFile(t *testing.T, path string) {
|
||||
f, err := os.Create(path)
|
||||
require.NoError(t, err)
|
||||
f.Close()
|
||||
}
|
||||
|
||||
func TestNotebookPaths(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
touchFile(t, filepath.Join(dir, "my_job_notebook.py"))
|
||||
touchFile(t, filepath.Join(dir, "my_pipeline_notebook.py"))
|
||||
|
||||
bundle := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Path: dir,
|
||||
Resources: config.Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"job": {
|
||||
JobSettings: &jobs.JobSettings{
|
||||
Tasks: []jobs.JobTaskSettings{
|
||||
{
|
||||
NotebookTask: &jobs.NotebookTask{
|
||||
NotebookPath: "./my_job_notebook.py",
|
||||
},
|
||||
},
|
||||
{
|
||||
NotebookTask: &jobs.NotebookTask{
|
||||
NotebookPath: "./doesnt_exist.py",
|
||||
},
|
||||
},
|
||||
{
|
||||
NotebookTask: &jobs.NotebookTask{
|
||||
NotebookPath: "./my_job_notebook.py",
|
||||
},
|
||||
},
|
||||
{
|
||||
PythonWheelTask: &jobs.PythonWheelTask{
|
||||
PackageName: "foo",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Pipelines: map[string]*resources.Pipeline{
|
||||
"pipeline": {
|
||||
PipelineSpec: &pipelines.PipelineSpec{
|
||||
Libraries: []pipelines.PipelineLibrary{
|
||||
{
|
||||
Notebook: &pipelines.NotebookLibrary{
|
||||
Path: "./my_pipeline_notebook.py",
|
||||
},
|
||||
},
|
||||
{
|
||||
Notebook: &pipelines.NotebookLibrary{
|
||||
Path: "./doesnt_exist.py",
|
||||
},
|
||||
},
|
||||
{
|
||||
Notebook: &pipelines.NotebookLibrary{
|
||||
Path: "./my_pipeline_notebook.py",
|
||||
},
|
||||
},
|
||||
{
|
||||
Jar: "foo",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_, err := mutator.TranslateNotebookPaths().Apply(context.Background(), bundle)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Assert that the notebook artifact was defined.
|
||||
assert.Len(t, bundle.Config.Artifacts, 2)
|
||||
for _, artifact := range bundle.Config.Artifacts {
|
||||
assert.Contains(t, artifact.Notebook.Path, "notebook.py")
|
||||
}
|
||||
|
||||
// Assert that the path in the tasks now refer to the artifact.
|
||||
assert.Equal(
|
||||
t,
|
||||
"${artifacts.my_job_notebook_py.notebook.remote_path}",
|
||||
bundle.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath,
|
||||
)
|
||||
assert.Equal(
|
||||
t,
|
||||
"./doesnt_exist.py",
|
||||
bundle.Config.Resources.Jobs["job"].Tasks[1].NotebookTask.NotebookPath,
|
||||
)
|
||||
assert.Equal(
|
||||
t,
|
||||
"${artifacts.my_job_notebook_py.notebook.remote_path}",
|
||||
bundle.Config.Resources.Jobs["job"].Tasks[2].NotebookTask.NotebookPath,
|
||||
)
|
||||
|
||||
// Assert that the path in the libraries now refer to the artifact.
|
||||
assert.Equal(
|
||||
t,
|
||||
"${artifacts.my_pipeline_notebook_py.notebook.remote_path}",
|
||||
bundle.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path,
|
||||
)
|
||||
assert.Equal(
|
||||
t,
|
||||
"./doesnt_exist.py",
|
||||
bundle.Config.Resources.Pipelines["pipeline"].Libraries[1].Notebook.Path,
|
||||
)
|
||||
assert.Equal(
|
||||
t,
|
||||
"${artifacts.my_pipeline_notebook_py.notebook.remote_path}",
|
||||
bundle.Config.Resources.Pipelines["pipeline"].Libraries[2].Notebook.Path,
|
||||
)
|
||||
}
|
|
@ -16,6 +16,7 @@ func Initialize() bundle.Mutator {
|
|||
[]bundle.Mutator{
|
||||
mutator.PopulateCurrentUser(),
|
||||
mutator.DefaultArtifactPath(),
|
||||
mutator.TranslateNotebookPaths(),
|
||||
interpolation.Interpolate(
|
||||
interpolation.IncludeLookupsInPath("bundle"),
|
||||
interpolation.IncludeLookupsInPath("workspace"),
|
||||
|
|
Loading…
Reference in New Issue