Mutator to convert paths to local notebooks files into artifacts (#144)

This lets you write:
```yaml
libraries:
  - notebook:
      path: ./events.sql
```

Instead of:
```yaml
artifacts:
  events_sql:
    notebook:
      path: ./events.sql

libraries:
  - notebook:
      path: "${artifacts.events_sql.notebook.remote_path}"
```
This commit is contained in:
Pieter Noordhuis 2022-12-16 14:49:23 +01:00 committed by GitHub
parent 1a9a431b97
commit 4026b2cda2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 223 additions and 0 deletions

View File

@ -0,0 +1,88 @@
package mutator
import (
"context"
"fmt"
"os"
"path"
"path/filepath"
"regexp"
"github.com/databricks/bricks/bundle"
"github.com/databricks/bricks/bundle/config"
)
type translateNotebookPaths struct {
seen map[string]string
}
// TranslateNotebookPaths converts paths to local notebook files into references to artifacts.
func TranslateNotebookPaths() bundle.Mutator {
return &translateNotebookPaths{}
}
func (m *translateNotebookPaths) Name() string {
return "TranslateNotebookPaths"
}
var nonWord = regexp.MustCompile(`[^\w]`)
func (m *translateNotebookPaths) rewritePath(b *bundle.Bundle, p *string) {
relPath := path.Clean(*p)
absPath := filepath.Join(b.Config.Path, relPath)
// This is opportunistic. If we can't stat, continue.
_, err := os.Stat(absPath)
if err != nil {
return
}
// Define artifact for this notebook.
id := nonWord.ReplaceAllString(relPath, "_")
if v, ok := m.seen[id]; ok {
*p = v
return
}
b.Config.Artifacts[id] = &config.Artifact{
Notebook: &config.NotebookArtifact{
Path: relPath,
},
}
interp := fmt.Sprintf("${artifacts.%s.notebook.remote_path}", id)
*p = interp
m.seen[id] = interp
}
func (m *translateNotebookPaths) Apply(_ context.Context, b *bundle.Bundle) ([]bundle.Mutator, error) {
m.seen = make(map[string]string)
if b.Config.Artifacts == nil {
b.Config.Artifacts = make(map[string]*config.Artifact)
}
for _, job := range b.Config.Resources.Jobs {
for i := 0; i < len(job.Tasks); i++ {
task := &job.Tasks[i]
if task.NotebookTask == nil {
continue
}
m.rewritePath(b, &task.NotebookTask.NotebookPath)
}
}
for _, pipeline := range b.Config.Resources.Pipelines {
for i := 0; i < len(pipeline.Libraries); i++ {
library := &pipeline.Libraries[i]
if library.Notebook == nil {
continue
}
m.rewritePath(b, &library.Notebook.Path)
}
}
return nil, nil
}

View File

@ -0,0 +1,134 @@
package mutator_test
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/databricks/bricks/bundle"
"github.com/databricks/bricks/bundle/config"
"github.com/databricks/bricks/bundle/config/mutator"
"github.com/databricks/bricks/bundle/config/resources"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/pipelines"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func touchFile(t *testing.T, path string) {
f, err := os.Create(path)
require.NoError(t, err)
f.Close()
}
func TestNotebookPaths(t *testing.T) {
dir := t.TempDir()
touchFile(t, filepath.Join(dir, "my_job_notebook.py"))
touchFile(t, filepath.Join(dir, "my_pipeline_notebook.py"))
bundle := &bundle.Bundle{
Config: config.Root{
Path: dir,
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.JobTaskSettings{
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "./my_job_notebook.py",
},
},
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "./doesnt_exist.py",
},
},
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "./my_job_notebook.py",
},
},
{
PythonWheelTask: &jobs.PythonWheelTask{
PackageName: "foo",
},
},
},
},
},
},
Pipelines: map[string]*resources.Pipeline{
"pipeline": {
PipelineSpec: &pipelines.PipelineSpec{
Libraries: []pipelines.PipelineLibrary{
{
Notebook: &pipelines.NotebookLibrary{
Path: "./my_pipeline_notebook.py",
},
},
{
Notebook: &pipelines.NotebookLibrary{
Path: "./doesnt_exist.py",
},
},
{
Notebook: &pipelines.NotebookLibrary{
Path: "./my_pipeline_notebook.py",
},
},
{
Jar: "foo",
},
},
},
},
},
},
},
}
_, err := mutator.TranslateNotebookPaths().Apply(context.Background(), bundle)
require.NoError(t, err)
// Assert that the notebook artifact was defined.
assert.Len(t, bundle.Config.Artifacts, 2)
for _, artifact := range bundle.Config.Artifacts {
assert.Contains(t, artifact.Notebook.Path, "notebook.py")
}
// Assert that the path in the tasks now refer to the artifact.
assert.Equal(
t,
"${artifacts.my_job_notebook_py.notebook.remote_path}",
bundle.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath,
)
assert.Equal(
t,
"./doesnt_exist.py",
bundle.Config.Resources.Jobs["job"].Tasks[1].NotebookTask.NotebookPath,
)
assert.Equal(
t,
"${artifacts.my_job_notebook_py.notebook.remote_path}",
bundle.Config.Resources.Jobs["job"].Tasks[2].NotebookTask.NotebookPath,
)
// Assert that the path in the libraries now refer to the artifact.
assert.Equal(
t,
"${artifacts.my_pipeline_notebook_py.notebook.remote_path}",
bundle.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path,
)
assert.Equal(
t,
"./doesnt_exist.py",
bundle.Config.Resources.Pipelines["pipeline"].Libraries[1].Notebook.Path,
)
assert.Equal(
t,
"${artifacts.my_pipeline_notebook_py.notebook.remote_path}",
bundle.Config.Resources.Pipelines["pipeline"].Libraries[2].Notebook.Path,
)
}

View File

@ -16,6 +16,7 @@ func Initialize() bundle.Mutator {
[]bundle.Mutator{
mutator.PopulateCurrentUser(),
mutator.DefaultArtifactPath(),
mutator.TranslateNotebookPaths(),
interpolation.Interpolate(
interpolation.IncludeLookupsInPath("bundle"),
interpolation.IncludeLookupsInPath("workspace"),